[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
blob8196ce6c328b4e1b3d6c7484a08a2d08582248a1
1 import functools
2 import json
3 import random
4 import re
5 import urllib.parse
7 from .common import InfoExtractor
8 from .periscope import PeriscopeBaseIE, PeriscopeIE
9 from ..networking.exceptions import HTTPError
10 from ..utils import (
11 ExtractorError,
12 dict_get,
13 filter_dict,
14 float_or_none,
15 format_field,
16 int_or_none,
17 join_nonempty,
18 make_archive_id,
19 remove_end,
20 str_or_none,
21 strip_or_none,
22 traverse_obj,
23 try_call,
24 try_get,
25 unified_timestamp,
26 update_url_query,
27 url_or_none,
28 xpath_text,
32 class TwitterBaseIE(InfoExtractor):
33 _NETRC_MACHINE = 'twitter'
34 _API_BASE = 'https://api.x.com/1.1/'
35 _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
36 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
37 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
38 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
39 _flow_token = None
41 _LOGIN_INIT_DATA = json.dumps({
42 'input_flow_data': {
43 'flow_context': {
44 'debug_overrides': {},
45 'start_location': {
46 'location': 'unknown',
50 'subtask_versions': {
51 'action_list': 2,
52 'alert_dialog': 1,
53 'app_download_cta': 1,
54 'check_logged_in_account': 1,
55 'choice_selection': 3,
56 'contacts_live_sync_permission_prompt': 0,
57 'cta': 7,
58 'email_verification': 2,
59 'end_flow': 1,
60 'enter_date': 1,
61 'enter_email': 2,
62 'enter_password': 5,
63 'enter_phone': 2,
64 'enter_recaptcha': 1,
65 'enter_text': 5,
66 'enter_username': 2,
67 'generic_urt': 3,
68 'in_app_notification': 1,
69 'interest_picker': 3,
70 'js_instrumentation': 1,
71 'menu_dialog': 1,
72 'notifications_permission_prompt': 2,
73 'open_account': 2,
74 'open_home_timeline': 1,
75 'open_link': 1,
76 'phone_verification': 4,
77 'privacy_options': 1,
78 'security_key': 3,
79 'select_avatar': 4,
80 'select_banner': 2,
81 'settings_list': 7,
82 'show_code': 1,
83 'sign_up': 2,
84 'sign_up_review': 4,
85 'tweet_selection_urt': 1,
86 'update_users': 1,
87 'upload_media': 1,
88 'user_recommendations_list': 4,
89 'user_recommendations_urt': 1,
90 'wait_spinner': 3,
91 'web_modal': 1,
93 }, separators=(',', ':')).encode()
95 def _extract_variant_formats(self, variant, video_id):
96 variant_url = variant.get('url')
97 if not variant_url:
98 return [], {}
99 elif '.m3u8' in variant_url:
100 fmts, subs = self._extract_m3u8_formats_and_subtitles(
101 variant_url, video_id, 'mp4', 'm3u8_native',
102 m3u8_id='hls', fatal=False)
103 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
104 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
105 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
106 return fmts, subs
107 else:
108 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
109 f = {
110 'url': variant_url,
111 'format_id': join_nonempty('http', tbr),
112 'tbr': tbr,
114 self._search_dimensions_in_video_url(f, variant_url)
115 return [f], {}
117 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
118 vmap_url = url_or_none(vmap_url)
119 if not vmap_url:
120 return [], {}
121 vmap_data = self._download_xml(vmap_url, video_id)
122 formats = []
123 subtitles = {}
124 urls = []
125 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
126 video_variant.attrib['url'] = urllib.parse.unquote(
127 video_variant.attrib['url'])
128 urls.append(video_variant.attrib['url'])
129 fmts, subs = self._extract_variant_formats(
130 video_variant.attrib, video_id)
131 formats.extend(fmts)
132 subtitles = self._merge_subtitles(subtitles, subs)
133 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
134 if video_url not in urls:
135 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
136 formats.extend(fmts)
137 subtitles = self._merge_subtitles(subtitles, subs)
138 return formats, subtitles
140 @staticmethod
141 def _search_dimensions_in_video_url(a_format, video_url):
142 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
143 if m:
144 a_format.update({
145 'width': int(m.group('width')),
146 'height': int(m.group('height')),
149 @property
150 def is_logged_in(self):
151 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
153 @functools.cached_property
154 def _selected_api(self):
155 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
157 def _fetch_guest_token(self, display_id):
158 guest_token = traverse_obj(self._download_json(
159 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
160 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
161 ('guest_token', {str}))
162 if not guest_token:
163 raise ExtractorError('Could not retrieve guest token')
164 return guest_token
166 def _set_base_headers(self, legacy=False):
167 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
168 return filter_dict({
169 'Authorization': f'Bearer {bearer_token}',
170 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
173 def _call_login_api(self, note, headers, query={}, data=None):
174 response = self._download_json(
175 f'{self._API_BASE}onboarding/task.json', None, note,
176 headers=headers, query=query, data=data, expected_status=400)
177 error = traverse_obj(response, ('errors', 0, 'message', {str}))
178 if error:
179 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
180 elif traverse_obj(response, 'status') != 'success':
181 raise ExtractorError('Login was unsuccessful')
183 subtask = traverse_obj(
184 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
185 if not subtask:
186 raise ExtractorError('Twitter API did not return next login subtask')
188 self._flow_token = response['flow_token']
190 return subtask
192 def _perform_login(self, username, password):
193 if self.is_logged_in:
194 return
196 guest_token = self._fetch_guest_token(None)
197 headers = {
198 **self._set_base_headers(),
199 'content-type': 'application/json',
200 'x-guest-token': guest_token,
201 'x-twitter-client-language': 'en',
202 'x-twitter-active-user': 'yes',
203 'Referer': 'https://x.com/',
204 'Origin': 'https://x.com',
207 def build_login_json(*subtask_inputs):
208 return json.dumps({
209 'flow_token': self._flow_token,
210 'subtask_inputs': subtask_inputs,
211 }, separators=(',', ':')).encode()
213 def input_dict(subtask_id, text):
214 return {
215 'subtask_id': subtask_id,
216 'enter_text': {
217 'text': text,
218 'link': 'next_link',
222 next_subtask = self._call_login_api(
223 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
225 while not self.is_logged_in:
226 if next_subtask == 'LoginJsInstrumentationSubtask':
227 next_subtask = self._call_login_api(
228 'Submitting JS instrumentation response', headers, data=build_login_json({
229 'subtask_id': next_subtask,
230 'js_instrumentation': {
231 'response': '{}',
232 'link': 'next_link',
236 elif next_subtask == 'LoginEnterUserIdentifierSSO':
237 next_subtask = self._call_login_api(
238 'Submitting username', headers, data=build_login_json({
239 'subtask_id': next_subtask,
240 'settings_list': {
241 'setting_responses': [{
242 'key': 'user_identifier',
243 'response_data': {
244 'text_data': {
245 'result': username,
249 'link': 'next_link',
253 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
254 next_subtask = self._call_login_api(
255 'Submitting alternate identifier', headers,
256 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
257 'one of username, phone number or email that was not used as --username'))))
259 elif next_subtask == 'LoginEnterPassword':
260 next_subtask = self._call_login_api(
261 'Submitting password', headers, data=build_login_json({
262 'subtask_id': next_subtask,
263 'enter_password': {
264 'password': password,
265 'link': 'next_link',
269 elif next_subtask == 'AccountDuplicationCheck':
270 next_subtask = self._call_login_api(
271 'Submitting account duplication check', headers, data=build_login_json({
272 'subtask_id': next_subtask,
273 'check_logged_in_account': {
274 'link': 'AccountDuplicationCheck_false',
278 elif next_subtask == 'LoginTwoFactorAuthChallenge':
279 next_subtask = self._call_login_api(
280 'Submitting 2FA token', headers, data=build_login_json(input_dict(
281 next_subtask, self._get_tfa_info('two-factor authentication token'))))
283 elif next_subtask == 'LoginAcid':
284 next_subtask = self._call_login_api(
285 'Submitting confirmation code', headers, data=build_login_json(input_dict(
286 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
288 elif next_subtask == 'ArkoseLogin':
289 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
291 elif next_subtask == 'DenyLoginSubtask':
292 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
294 elif next_subtask == 'LoginSuccessSubtask':
295 raise ExtractorError('Twitter API did not grant auth token cookie')
297 else:
298 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
300 self.report_login()
302 def _call_api(self, path, video_id, query={}, graphql=False):
303 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
304 headers.update({
305 'x-twitter-auth-type': 'OAuth2Session',
306 'x-twitter-client-language': 'en',
307 'x-twitter-active-user': 'yes',
308 } if self.is_logged_in else {
309 'x-guest-token': self._fetch_guest_token(video_id),
311 allowed_status = {400, 401, 403, 404} if graphql else {403}
312 result = self._download_json(
313 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
314 video_id, headers=headers, query=query, expected_status=allowed_status,
315 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
317 if result.get('errors'):
318 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
319 if errors and 'not authorized' in errors:
320 self.raise_login_required(remove_end(errors, '.'))
321 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
323 return result
325 def _build_graphql_query(self, media_id):
326 raise NotImplementedError('Method must be implemented to support GraphQL')
328 def _call_graphql_api(self, endpoint, media_id):
329 data = self._build_graphql_query(media_id)
330 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
331 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
334 class TwitterCardIE(InfoExtractor):
335 IE_NAME = 'twitter:card'
336 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
337 _TESTS = [
339 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
340 # MD5 checksums are different in different places
341 'info_dict': {
342 'id': '560070131976392705',
343 'ext': 'mp4',
344 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
345 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
346 'uploader': 'Twitter',
347 'uploader_id': 'Twitter',
348 'thumbnail': r're:^https?://.*\.jpg',
349 'duration': 30.033,
350 'timestamp': 1422366112,
351 'upload_date': '20150127',
352 'age_limit': 0,
353 'comment_count': int,
354 'tags': [],
355 'repost_count': int,
356 'like_count': int,
357 'display_id': '560070183650213889',
358 'uploader_url': 'https://twitter.com/Twitter',
362 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
363 'md5': '7137eca597f72b9abbe61e5ae0161399',
364 'info_dict': {
365 'id': '623160978427936768',
366 'ext': 'mp4',
367 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
368 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
369 'uploader': 'NASA',
370 'uploader_id': 'NASA',
371 'timestamp': 1437408129,
372 'upload_date': '20150720',
373 'uploader_url': 'https://twitter.com/NASA',
374 'age_limit': 0,
375 'comment_count': int,
376 'like_count': int,
377 'repost_count': int,
378 'tags': ['PlutoFlyby'],
380 'params': {'format': '[protocol=https]'},
383 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
384 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
385 'info_dict': {
386 'id': 'dq4Oj5quskI',
387 'ext': 'mp4',
388 'title': 'Ubuntu 11.10 Overview',
389 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
390 'upload_date': '20111013',
391 'uploader': 'OMG! UBUNTU!',
392 'uploader_id': 'omgubuntu',
393 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
394 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
395 'channel_follower_count': int,
396 'chapters': 'count:8',
397 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
398 'duration': 138,
399 'categories': ['Film & Animation'],
400 'age_limit': 0,
401 'comment_count': int,
402 'availability': 'public',
403 'like_count': int,
404 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
405 'view_count': int,
406 'tags': 'count:12',
407 'channel': 'OMG! UBUNTU!',
408 'playable_in_embed': True,
410 'add_ie': ['Youtube'],
413 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
414 'info_dict': {
415 'id': 'iBb2x00UVlv',
416 'ext': 'mp4',
417 'upload_date': '20151113',
418 'uploader_id': '1189339351084113920',
419 'uploader': 'ArsenalTerje',
420 'title': 'Vine by ArsenalTerje',
421 'timestamp': 1447451307,
422 'alt_title': 'Vine by ArsenalTerje',
423 'comment_count': int,
424 'like_count': int,
425 'thumbnail': r're:^https?://[^?#]+\.jpg',
426 'view_count': int,
427 'repost_count': int,
429 'add_ie': ['Vine'],
430 'params': {'skip_download': 'm3u8'},
433 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
434 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
435 'info_dict': {
436 'id': '705235433198714880',
437 'ext': 'mp4',
438 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
439 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
440 'uploader': 'Brent Yarina',
441 'uploader_id': 'BTNBrentYarina',
442 'timestamp': 1456976204,
443 'upload_date': '20160303',
445 'skip': 'This content is no longer available.',
448 'url': 'https://twitter.com/i/videos/752274308186120192',
449 'only_matching': True,
453 def _real_extract(self, url):
454 status_id = self._match_id(url)
455 return self.url_result(
456 'https://twitter.com/statuses/' + status_id,
457 TwitterIE.ie_key(), status_id)
460 class TwitterIE(TwitterBaseIE):
461 IE_NAME = 'twitter'
462 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
464 _TESTS = [{
465 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
466 'info_dict': {
467 'id': '643211870443208704',
468 'display_id': '643211948184596480',
469 'ext': 'mp4',
470 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
471 'thumbnail': r're:^https?://.*\.jpg',
472 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
473 'channel_id': '549749560',
474 'uploader': 'FREE THE NIPPLE',
475 'uploader_id': 'freethenipple',
476 'duration': 12.922,
477 'timestamp': 1442188653,
478 'upload_date': '20150913',
479 'uploader_url': 'https://twitter.com/freethenipple',
480 'comment_count': int,
481 'repost_count': int,
482 'like_count': int,
483 'tags': [],
484 'age_limit': 18,
485 '_old_archive_ids': ['twitter 643211948184596480'],
487 'skip': 'Requires authentication',
488 }, {
489 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
490 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
491 'info_dict': {
492 'id': '657991469417025536',
493 'ext': 'mp4',
494 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
495 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
496 'thumbnail': r're:^https?://.*\.png',
497 'uploader': 'Gifs',
498 'uploader_id': 'giphz',
500 'expected_warnings': ['height', 'width'],
501 'skip': 'Account suspended',
502 }, {
503 'url': 'https://twitter.com/starwars/status/665052190608723968',
504 'info_dict': {
505 'id': '665052190608723968',
506 'display_id': '665052190608723968',
507 'ext': 'mp4',
508 'title': r're:Star Wars.*A new beginning is coming December 18.*',
509 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
510 'channel_id': '20106852',
511 'uploader_id': 'starwars',
512 'uploader': r're:Star Wars.*',
513 'timestamp': 1447395772,
514 'upload_date': '20151113',
515 'uploader_url': 'https://twitter.com/starwars',
516 'comment_count': int,
517 'repost_count': int,
518 'like_count': int,
519 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
520 'age_limit': 0,
521 '_old_archive_ids': ['twitter 665052190608723968'],
523 }, {
524 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
525 'info_dict': {
526 'id': '705235433198714880',
527 'ext': 'mp4',
528 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
529 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
530 'uploader_id': 'BTNBrentYarina',
531 'uploader': 'Brent Yarina',
532 'timestamp': 1456976204,
533 'upload_date': '20160303',
534 'uploader_url': 'https://twitter.com/BTNBrentYarina',
535 'comment_count': int,
536 'repost_count': int,
537 'like_count': int,
538 'tags': [],
539 'age_limit': 0,
541 'params': {
542 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
543 # Test case of TwitterCardIE
544 'skip_download': True,
546 'skip': 'Dead external link',
547 }, {
548 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
549 'info_dict': {
550 'id': '700207414000242688',
551 'display_id': '700207533655363584',
552 'ext': 'mp4',
553 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
554 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
555 'thumbnail': r're:^https?://.*\.jpg',
556 'channel_id': '1383165541',
557 'uploader': 'jaydin donte geer',
558 'uploader_id': 'jaydingeer',
559 'duration': 30.0,
560 'timestamp': 1455777459,
561 'upload_date': '20160218',
562 'uploader_url': 'https://twitter.com/jaydingeer',
563 'comment_count': int,
564 'repost_count': int,
565 'like_count': int,
566 'tags': ['Damndaniel'],
567 'age_limit': 0,
568 '_old_archive_ids': ['twitter 700207533655363584'],
570 }, {
571 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
572 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
573 'info_dict': {
574 'id': 'MIOxnrUteUd',
575 'ext': 'mp4',
576 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
577 'uploader': 'TAKUMA',
578 'uploader_id': '1004126642786242560',
579 'timestamp': 1402826626,
580 'upload_date': '20140615',
581 'thumbnail': r're:^https?://.*\.jpg',
582 'alt_title': 'Vine by TAKUMA',
583 'comment_count': int,
584 'repost_count': int,
585 'like_count': int,
586 'view_count': int,
588 'add_ie': ['Vine'],
589 }, {
590 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
591 'info_dict': {
592 'id': '717462543795523584',
593 'display_id': '719944021058060289',
594 'ext': 'mp4',
595 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
596 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
597 'channel_id': '701615052',
598 'uploader_id': 'CaptainAmerica',
599 'uploader': 'Captain America',
600 'duration': 3.17,
601 'timestamp': 1460483005,
602 'upload_date': '20160412',
603 'uploader_url': 'https://twitter.com/CaptainAmerica',
604 'thumbnail': r're:^https?://.*\.jpg',
605 'comment_count': int,
606 'repost_count': int,
607 'like_count': int,
608 'tags': [],
609 'age_limit': 0,
610 '_old_archive_ids': ['twitter 719944021058060289'],
612 }, {
613 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
614 'info_dict': {
615 'id': '1zqKVVlkqLaKB',
616 'ext': 'mp4',
617 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
618 'upload_date': '20160923',
619 'uploader_id': '1PmKqpJdOJQoY',
620 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
621 'timestamp': 1474613214,
622 'thumbnail': r're:^https?://.*\.jpg',
624 'add_ie': ['Periscope'],
625 'skip': 'Broadcast not found',
626 }, {
627 # has mp4 formats via mobile API
628 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
629 'info_dict': {
630 'id': '852077943283097602',
631 'ext': 'mp4',
632 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
633 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
634 'channel_id': '2526757026',
635 'uploader': 'عالم الأخبار',
636 'uploader_id': 'news_al3alm',
637 'duration': 277.4,
638 'timestamp': 1492000653,
639 'upload_date': '20170412',
640 'display_id': '852138619213144067',
641 'age_limit': 0,
642 'uploader_url': 'https://twitter.com/news_al3alm',
643 'thumbnail': r're:^https?://.*\.jpg',
644 'tags': [],
645 'repost_count': int,
646 'like_count': int,
647 'comment_count': int,
648 '_old_archive_ids': ['twitter 852138619213144067'],
650 }, {
651 'url': 'https://twitter.com/i/web/status/910031516746514432',
652 'info_dict': {
653 'id': '910030238373089285',
654 'display_id': '910031516746514432',
655 'ext': 'mp4',
656 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
657 'thumbnail': r're:^https?://.*\.jpg',
658 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
659 'channel_id': '2319432498',
660 'uploader': 'Préfet de Guadeloupe',
661 'uploader_id': 'Prefet971',
662 'duration': 47.48,
663 'timestamp': 1505803395,
664 'upload_date': '20170919',
665 'uploader_url': 'https://twitter.com/Prefet971',
666 'comment_count': int,
667 'repost_count': int,
668 'like_count': int,
669 'tags': ['Maria'],
670 'age_limit': 0,
671 '_old_archive_ids': ['twitter 910031516746514432'],
673 'params': {
674 'skip_download': True, # requires ffmpeg
676 }, {
677 # card via api.twitter.com/1.1/videos/tweet/config
678 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
679 'info_dict': {
680 'id': '1001551417340022785',
681 'display_id': '1001551623938805763',
682 'ext': 'mp4',
683 'title': 're:.*?Shep is on a roll today.*?',
684 'thumbnail': r're:^https?://.*\.jpg',
685 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
686 'channel_id': '255036353',
687 'uploader': 'Lis Power',
688 'uploader_id': 'LisPower1',
689 'duration': 111.278,
690 'timestamp': 1527623489,
691 'upload_date': '20180529',
692 'uploader_url': 'https://twitter.com/LisPower1',
693 'comment_count': int,
694 'repost_count': int,
695 'like_count': int,
696 'tags': [],
697 'age_limit': 0,
698 '_old_archive_ids': ['twitter 1001551623938805763'],
700 'params': {
701 'skip_download': True, # requires ffmpeg
703 }, {
704 'url': 'https://twitter.com/foobar/status/1087791357756956680',
705 'info_dict': {
706 'id': '1087791272830607360',
707 'display_id': '1087791357756956680',
708 'ext': 'mp4',
709 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
710 'thumbnail': r're:^https?://.*\.jpg',
711 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
712 'uploader': 'X',
713 'uploader_id': 'X',
714 'duration': 61.567,
715 'timestamp': 1548184644,
716 'upload_date': '20190122',
717 'uploader_url': 'https://twitter.com/X',
718 'comment_count': int,
719 'repost_count': int,
720 'like_count': int,
721 'view_count': int,
722 'tags': [],
723 'age_limit': 0,
725 'skip': 'This Tweet is unavailable',
726 }, {
727 # not available in Periscope
728 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
729 'info_dict': {
730 'id': '1vOGwqejwoWxB',
731 'ext': 'mp4',
732 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
733 'uploader': 'Vivi',
734 'uploader_id': '1eVjYOLGkGrQL',
735 'thumbnail': r're:^https?://.*\.jpg',
736 'tags': ['EduTECH2019'],
737 'view_count': int,
739 'add_ie': ['TwitterBroadcast'],
740 'skip': 'Broadcast no longer exists',
741 }, {
742 # unified card
743 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
744 'info_dict': {
745 'id': '1349774757969989634',
746 'display_id': '1349794411333394432',
747 'ext': 'mp4',
748 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
749 'thumbnail': r're:^https?://.*\.jpg',
750 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
751 'channel_id': '18552281',
752 'uploader': 'Brooklyn Nets',
753 'uploader_id': 'BrooklynNets',
754 'duration': 324.484,
755 'timestamp': 1610651040,
756 'upload_date': '20210114',
757 'uploader_url': 'https://twitter.com/BrooklynNets',
758 'comment_count': int,
759 'repost_count': int,
760 'like_count': int,
761 'tags': [],
762 'age_limit': 0,
763 '_old_archive_ids': ['twitter 1349794411333394432'],
765 'params': {
766 'skip_download': True,
768 }, {
769 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
770 'info_dict': {
771 'id': '1577855447914409984',
772 'display_id': '1577855540407197696',
773 'ext': 'mp4',
774 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
775 'description': 'md5:b9c3699335447391d11753ab21c70a74',
776 'upload_date': '20221006',
777 'channel_id': '143077138',
778 'uploader': 'Oshtru',
779 'uploader_id': 'oshtru',
780 'uploader_url': 'https://twitter.com/oshtru',
781 'thumbnail': r're:^https?://.*\.jpg',
782 'duration': 30.03,
783 'timestamp': 1665025050,
784 'comment_count': int,
785 'repost_count': int,
786 'like_count': int,
787 'tags': [],
788 'age_limit': 0,
789 '_old_archive_ids': ['twitter 1577855540407197696'],
791 'params': {'skip_download': True},
792 }, {
793 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
794 'info_dict': {
795 'id': '1577719286659006464',
796 'title': 'Ultima Reload - Test',
797 'description': 'Test https://t.co/Y3KEZD7Dad',
798 'channel_id': '168922496',
799 'uploader': 'Ultima Reload',
800 'uploader_id': 'UltimaShadowX',
801 'uploader_url': 'https://twitter.com/UltimaShadowX',
802 'upload_date': '20221005',
803 'timestamp': 1664992565,
804 'comment_count': int,
805 'repost_count': int,
806 'like_count': int,
807 'tags': [],
808 'age_limit': 0,
810 'playlist_count': 4,
811 'params': {'skip_download': True},
812 }, {
813 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
814 'info_dict': {
815 'id': '1575559336759263233',
816 'display_id': '1575560063510810624',
817 'ext': 'mp4',
818 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
819 'thumbnail': r're:^https?://.*\.jpg',
820 'description': 'md5:95aea692fda36a12081b9629b02daa92',
821 'channel_id': '1094109584',
822 'uploader': 'Max Olson',
823 'uploader_id': 'MesoMax919',
824 'uploader_url': 'https://twitter.com/MesoMax919',
825 'duration': 21.321,
826 'timestamp': 1664477766,
827 'upload_date': '20220929',
828 'comment_count': int,
829 'repost_count': int,
830 'like_count': int,
831 'tags': ['HurricaneIan'],
832 'age_limit': 0,
833 '_old_archive_ids': ['twitter 1575560063510810624'],
835 }, {
836 # Adult content, fails if not logged in
837 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
838 'info_dict': {
839 'id': '1575199163847000068',
840 'display_id': '1575199173472927762',
841 'ext': 'mp4',
842 'title': str,
843 'description': str,
844 'channel_id': '1217167793541480450',
845 'uploader': str,
846 'uploader_id': 'Rizdraws',
847 'uploader_url': 'https://twitter.com/Rizdraws',
848 'upload_date': '20220928',
849 'timestamp': 1664391723,
850 'thumbnail': r're:^https?://.+\.jpg',
851 'like_count': int,
852 'repost_count': int,
853 'comment_count': int,
854 'age_limit': 18,
855 'tags': [],
856 '_old_archive_ids': ['twitter 1575199173472927762'],
858 'params': {'skip_download': 'The media could not be played'},
859 'skip': 'Requires authentication',
860 }, {
861 # Playlist result only with graphql API
862 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
863 'playlist_mincount': 2,
864 'info_dict': {
865 'id': '1395079556562706435',
866 'title': str,
867 'tags': [],
868 'channel_id': '21539378',
869 'uploader': str,
870 'like_count': int,
871 'upload_date': '20210519',
872 'age_limit': 0,
873 'repost_count': int,
874 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
875 'uploader_id': 'Srirachachau',
876 'comment_count': int,
877 'uploader_url': 'https://twitter.com/Srirachachau',
878 'timestamp': 1621447860,
880 }, {
881 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
882 'playlist_mincount': 2,
883 'info_dict': {
884 'id': '1578353380363501568',
885 'title': str,
886 'channel_id': '2195866214',
887 'uploader_id': 'DavidToons_',
888 'repost_count': int,
889 'like_count': int,
890 'uploader': str,
891 'timestamp': 1665143744,
892 'uploader_url': 'https://twitter.com/DavidToons_',
893 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
894 'tags': [],
895 'comment_count': int,
896 'upload_date': '20221007',
897 'age_limit': 0,
899 }, {
900 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
901 'playlist_count': 2,
902 'info_dict': {
903 'id': '1578401165338976258',
904 'title': str,
905 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
906 'channel_id': '19338359',
907 'uploader': str,
908 'uploader_id': 'primevideouk',
909 'timestamp': 1665155137,
910 'upload_date': '20221007',
911 'age_limit': 0,
912 'uploader_url': 'https://twitter.com/primevideouk',
913 'comment_count': int,
914 'repost_count': int,
915 'like_count': int,
916 'tags': ['TheRingsOfPower'],
918 }, {
919 # Twitter Spaces
920 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
921 'info_dict': {
922 'id': '1lPJqmBeeNAJb',
923 'ext': 'm4a',
924 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
925 'uploader': r're:Monique Camarra.+?',
926 'uploader_id': 'MoniqueCamarra',
927 'live_status': 'was_live',
928 'release_timestamp': 1658417414,
929 'description': r're:Twitter Space participated by Sergej Sumlenny.+',
930 'timestamp': 1658407771,
931 'release_date': '20220721',
932 'upload_date': '20220721',
934 'add_ie': ['TwitterSpaces'],
935 'params': {'skip_download': 'm3u8'},
936 }, {
937 # URL specifies video number but --yes-playlist
938 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
939 'playlist_mincount': 2,
940 'info_dict': {
941 'id': '1600649710662213632',
942 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
943 'timestamp': 1670459604.0,
944 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
945 'comment_count': int,
946 'uploader_id': 'CTVJLaidlaw',
947 'channel_id': '80082014',
948 'repost_count': int,
949 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
950 'upload_date': '20221208',
951 'age_limit': 0,
952 'uploader': 'Jocelyn Laidlaw',
953 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
954 'like_count': int,
956 }, {
957 # URL specifies video number and --no-playlist
958 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
959 'info_dict': {
960 'id': '1600649511827013632',
961 'ext': 'mp4',
962 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
963 'thumbnail': r're:^https?://.+\.jpg',
964 'timestamp': 1670459604.0,
965 'channel_id': '80082014',
966 'uploader_id': 'CTVJLaidlaw',
967 'uploader': 'Jocelyn Laidlaw',
968 'repost_count': int,
969 'comment_count': int,
970 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
971 'duration': 102.226,
972 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
973 'display_id': '1600649710662213632',
974 'like_count': int,
975 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
976 'upload_date': '20221208',
977 'age_limit': 0,
978 '_old_archive_ids': ['twitter 1600649710662213632'],
980 'params': {'noplaylist': True},
981 }, {
982 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
983 # note the id different between extraction and url
984 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
985 'info_dict': {
986 'id': '1621117577354424321',
987 'display_id': '1621117700482416640',
988 'ext': 'mp4',
989 'title': '뽀 - 아 최우제 이동속도 봐',
990 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
991 'duration': 24.598,
992 'channel_id': '1281839411068432384',
993 'uploader': '뽀',
994 'uploader_id': 's2FAKER',
995 'uploader_url': 'https://twitter.com/s2FAKER',
996 'upload_date': '20230202',
997 'timestamp': 1675339553.0,
998 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
999 'age_limit': 18,
1000 'tags': [],
1001 'like_count': int,
1002 'repost_count': int,
1003 'comment_count': int,
1004 '_old_archive_ids': ['twitter 1621117700482416640'],
1006 'skip': 'Requires authentication',
1007 }, {
1008 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1009 'info_dict': {
1010 'id': '1599108643743473680',
1011 'display_id': '1599108751385972737',
1012 'ext': 'mp4',
1013 'title': '\u06ea - \U0001F48B',
1014 'channel_id': '1347791436809441283',
1015 'uploader_url': 'https://twitter.com/hlo_again',
1016 'like_count': int,
1017 'uploader_id': 'hlo_again',
1018 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1019 'repost_count': int,
1020 'duration': 9.531,
1021 'comment_count': int,
1022 'upload_date': '20221203',
1023 'age_limit': 0,
1024 'timestamp': 1670092210.0,
1025 'tags': [],
1026 'uploader': '\u06ea',
1027 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1028 '_old_archive_ids': ['twitter 1599108751385972737'],
1030 'params': {'noplaylist': True},
1031 }, {
1032 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1033 'info_dict': {
1034 'id': '1600009362759733248',
1035 'display_id': '1600009574919962625',
1036 'ext': 'mp4',
1037 'channel_id': '211814412',
1038 'uploader_url': 'https://twitter.com/MunTheShinobi',
1039 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1040 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1041 'age_limit': 0,
1042 'uploader': 'Mün',
1043 'repost_count': int,
1044 'upload_date': '20221206',
1045 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1046 'comment_count': int,
1047 'like_count': int,
1048 'tags': [],
1049 'uploader_id': 'MunTheShinobi',
1050 'duration': 139.987,
1051 'timestamp': 1670306984.0,
1052 '_old_archive_ids': ['twitter 1600009574919962625'],
1054 }, {
1055 # retweeted_status (private)
1056 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1057 'info_dict': {
1058 'id': '1623274794488659969',
1059 'display_id': '1623739803874349067',
1060 'ext': 'mp4',
1061 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1062 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1063 'uploader': 'Johnny Bullets',
1064 'uploader_id': 'Johnnybull3ts',
1065 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1066 'age_limit': 0,
1067 'tags': [],
1068 'duration': 8.033,
1069 'timestamp': 1675853859.0,
1070 'upload_date': '20230208',
1071 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1072 'like_count': int,
1073 'repost_count': int,
1075 'skip': 'Protected tweet',
1076 }, {
1077 # retweeted_status
1078 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1079 'info_dict': {
1080 'id': '1694928337846538240',
1081 'ext': 'mp4',
1082 'display_id': '1695424220702888009',
1083 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1084 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1085 'channel_id': '15212187',
1086 'uploader': 'Benny Johnson',
1087 'uploader_id': 'bennyjohnson',
1088 'uploader_url': 'https://twitter.com/bennyjohnson',
1089 'age_limit': 0,
1090 'tags': [],
1091 'duration': 45.001,
1092 'timestamp': 1692962814.0,
1093 'upload_date': '20230825',
1094 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1095 'like_count': int,
1096 'repost_count': int,
1097 'comment_count': int,
1098 '_old_archive_ids': ['twitter 1695424220702888009'],
1100 }, {
1101 # retweeted_status w/ legacy API
1102 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1103 'info_dict': {
1104 'id': '1694928337846538240',
1105 'ext': 'mp4',
1106 'display_id': '1695424220702888009',
1107 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1108 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1109 'channel_id': '15212187',
1110 'uploader': 'Benny Johnson',
1111 'uploader_id': 'bennyjohnson',
1112 'uploader_url': 'https://twitter.com/bennyjohnson',
1113 'age_limit': 0,
1114 'tags': [],
1115 'duration': 45.001,
1116 'timestamp': 1692962814.0,
1117 'upload_date': '20230825',
1118 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1119 'like_count': int,
1120 'repost_count': int,
1121 '_old_archive_ids': ['twitter 1695424220702888009'],
1123 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1124 }, {
1125 # Broadcast embedded in tweet
1126 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1127 'info_dict': {
1128 'id': '1rmxPMjLzAXKN',
1129 'ext': 'mp4',
1130 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1131 'uploader': 'Jessica Dobson',
1132 'uploader_id': 'JessicaDobsonWX',
1133 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1134 'timestamp': 1701566398,
1135 'upload_date': '20231203',
1136 'live_status': 'was_live',
1137 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1138 'concurrent_view_count': int,
1139 'view_count': int,
1141 'add_ie': ['TwitterBroadcast'],
1142 }, {
1143 # Animated gif and quote tweet video
1144 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1145 'playlist_mincount': 2,
1146 'info_dict': {
1147 'id': '1696256659889565950',
1148 'title': 'BAKOON - https://t.co/zom968d0a0',
1149 'description': 'https://t.co/zom968d0a0',
1150 'tags': [],
1151 'channel_id': '1263540390',
1152 'uploader': 'BAKOON',
1153 'uploader_id': 'BAKKOOONN',
1154 'uploader_url': 'https://twitter.com/BAKKOOONN',
1155 'age_limit': 18,
1156 'timestamp': 1693254077.0,
1157 'upload_date': '20230828',
1158 'like_count': int,
1159 'comment_count': int,
1160 'repost_count': int,
1162 'skip': 'Requires authentication',
1163 }, {
1164 # "stale tweet" with typename "TweetWithVisibilityResults"
1165 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1166 'md5': '511377ff8dfa7545307084dca4dce319',
1167 'info_dict': {
1168 'id': '1724883339285544960',
1169 'ext': 'mp4',
1170 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1171 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1172 'display_id': '1724884212803834154',
1173 'channel_id': '337808606',
1174 'uploader': 'Robert F. Kennedy Jr',
1175 'uploader_id': 'RobertKennedyJr',
1176 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1177 'upload_date': '20231115',
1178 'timestamp': 1700079417.0,
1179 'duration': 341.048,
1180 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1181 'tags': ['Kennedy24'],
1182 'repost_count': int,
1183 'like_count': int,
1184 'comment_count': int,
1185 'age_limit': 0,
1186 '_old_archive_ids': ['twitter 1724884212803834154'],
1188 }, {
1189 # x.com
1190 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1191 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1192 'info_dict': {
1193 'id': '1790637589910654976',
1194 'ext': 'mp4',
1195 'title': 'Historic Vids - One of the most intense moments in history',
1196 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1197 'display_id': '1790637656616943991',
1198 'uploader': 'Historic Vids',
1199 'uploader_id': 'historyinmemes',
1200 'uploader_url': 'https://twitter.com/historyinmemes',
1201 'channel_id': '855481986290524160',
1202 'upload_date': '20240515',
1203 'timestamp': 1715756260.0,
1204 'duration': 15.488,
1205 'tags': [],
1206 'comment_count': int,
1207 'repost_count': int,
1208 'like_count': int,
1209 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1210 'age_limit': 0,
1211 '_old_archive_ids': ['twitter 1790637656616943991'],
1213 }, {
1214 # onion route
1215 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1216 'only_matching': True,
1217 }, {
1218 # Twitch Clip Embed
1219 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1220 'only_matching': True,
1221 }, {
1222 # promo_video_website card
1223 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1224 'only_matching': True,
1225 }, {
1226 # promo_video_convo card
1227 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1228 'only_matching': True,
1229 }, {
1230 # appplayer card
1231 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1232 'only_matching': True,
1233 }, {
1234 # video_direct_message card
1235 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1236 'only_matching': True,
1237 }, {
1238 # poll2choice_video card
1239 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1240 'only_matching': True,
1241 }, {
1242 # poll3choice_video card
1243 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1244 'only_matching': True,
1245 }, {
1246 # poll4choice_video card
1247 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1248 'only_matching': True,
1251 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1253 @property
1254 def _GRAPHQL_ENDPOINT(self):
1255 if self.is_logged_in:
1256 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1257 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1259 def _graphql_to_legacy(self, data, twid):
1260 result = traverse_obj(data, (
1261 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1262 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1263 'tweet_results', 'result', ('tweet', None), {dict},
1264 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1265 data, ('tweetResult', 'result', {dict}), default={})
1267 typename = result.get('__typename')
1268 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1269 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1271 if 'tombstone' in result:
1272 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1273 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1274 elif typename == 'TweetUnavailable':
1275 reason = result.get('reason')
1276 if reason == 'NsfwLoggedOut':
1277 self.raise_login_required('NSFW tweet requires authentication')
1278 elif reason == 'Protected':
1279 self.raise_login_required('You are not authorized to view this protected tweet')
1280 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1281 # Result for "stale tweet" needs additional transformation
1282 elif typename == 'TweetWithVisibilityResults':
1283 result = traverse_obj(result, ('tweet', {dict})) or {}
1285 status = result.get('legacy', {})
1286 status.update(traverse_obj(result, {
1287 'user': ('core', 'user_results', 'result', 'legacy'),
1288 'card': ('card', 'legacy'),
1289 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1290 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1291 }, expected_type=dict, default={}))
1293 # extra transformations needed since result does not match legacy format
1294 if status.get('retweeted_status'):
1295 status['retweeted_status']['user'] = traverse_obj(status, (
1296 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1298 binding_values = {
1299 binding_value.get('key'): binding_value.get('value')
1300 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1302 if binding_values:
1303 status['card']['binding_values'] = binding_values
1305 return status
1307 def _build_graphql_query(self, media_id):
1308 return {
1309 'variables': {
1310 'focalTweetId': media_id,
1311 'includePromotedContent': True,
1312 'with_rux_injections': False,
1313 'withBirdwatchNotes': True,
1314 'withCommunity': True,
1315 'withDownvotePerspective': False,
1316 'withQuickPromoteEligibilityTweetFields': True,
1317 'withReactionsMetadata': False,
1318 'withReactionsPerspective': False,
1319 'withSuperFollowsTweetFields': True,
1320 'withSuperFollowsUserFields': True,
1321 'withV2Timeline': True,
1322 'withVoice': True,
1324 'features': {
1325 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1326 'interactive_text_enabled': True,
1327 'responsive_web_edit_tweet_api_enabled': True,
1328 'responsive_web_enhance_cards_enabled': True,
1329 'responsive_web_graphql_timeline_navigation_enabled': False,
1330 'responsive_web_text_conversations_enabled': False,
1331 'responsive_web_uc_gql_enabled': True,
1332 'standardized_nudges_misinfo': True,
1333 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1334 'tweetypie_unmention_optimization_enabled': True,
1335 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1336 'verified_phone_label_enabled': False,
1337 'vibe_api_enabled': True,
1339 } if self.is_logged_in else {
1340 'variables': {
1341 'tweetId': media_id,
1342 'withCommunity': False,
1343 'includePromotedContent': False,
1344 'withVoice': False,
1346 'features': {
1347 'creator_subscriptions_tweet_preview_api_enabled': True,
1348 'tweetypie_unmention_optimization_enabled': True,
1349 'responsive_web_edit_tweet_api_enabled': True,
1350 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1351 'view_counts_everywhere_api_enabled': True,
1352 'longform_notetweets_consumption_enabled': True,
1353 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1354 'tweet_awards_web_tipping_enabled': False,
1355 'freedom_of_speech_not_reach_fetch_enabled': True,
1356 'standardized_nudges_misinfo': True,
1357 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1358 'longform_notetweets_rich_text_read_enabled': True,
1359 'longform_notetweets_inline_media_enabled': True,
1360 'responsive_web_graphql_exclude_directive_enabled': True,
1361 'verified_phone_label_enabled': False,
1362 'responsive_web_media_download_video_enabled': False,
1363 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1364 'responsive_web_graphql_timeline_navigation_enabled': True,
1365 'responsive_web_enhance_cards_enabled': False,
1367 'fieldToggles': {
1368 'withArticleRichContentState': False,
1372 def _call_syndication_api(self, twid):
1373 self.report_warning(
1374 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1375 status = self._download_json(
1376 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1377 headers={'User-Agent': 'Googlebot'}, query={
1378 'id': twid,
1379 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1380 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1382 if not status:
1383 raise ExtractorError('Syndication endpoint returned empty JSON response')
1384 # Transform the result so its structure matches that of legacy/graphql
1385 media = []
1386 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1387 detail['id_str'] = traverse_obj(detail, (
1388 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1389 media.append(detail)
1390 status['extended_entities'] = {'media': media}
1392 return status
1394 def _extract_status(self, twid):
1395 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1396 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1398 try:
1399 if self.is_logged_in or self._selected_api == 'graphql':
1400 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1401 elif self._selected_api == 'legacy':
1402 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1403 'cards_platform': 'Web-12',
1404 'include_cards': 1,
1405 'include_reply_count': 1,
1406 'include_user_entities': 0,
1407 'tweet_mode': 'extended',
1409 except ExtractorError as e:
1410 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1411 raise
1412 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1413 status = self._call_syndication_api(twid)
1415 if self._selected_api == 'syndication':
1416 status = self._call_syndication_api(twid)
1418 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1420 def _real_extract(self, url):
1421 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1422 status = self._extract_status(twid)
1424 title = description = traverse_obj(
1425 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1426 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1427 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1428 user = status.get('user') or {}
1429 uploader = user.get('name')
1430 if uploader:
1431 title = f'{uploader} - {title}'
1432 uploader_id = user.get('screen_name')
1434 info = {
1435 'id': twid,
1436 'title': title,
1437 'description': description,
1438 'uploader': uploader,
1439 'timestamp': unified_timestamp(status.get('created_at')),
1440 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
1441 'uploader_id': uploader_id,
1442 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1443 'like_count': int_or_none(status.get('favorite_count')),
1444 'repost_count': int_or_none(status.get('retweet_count')),
1445 'comment_count': int_or_none(status.get('reply_count')),
1446 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1447 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1450 def extract_from_video_info(media):
1451 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1452 self.write_debug(f'Extracting from video info: {media_id}')
1454 formats = []
1455 subtitles = {}
1456 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1457 fmts, subs = self._extract_variant_formats(variant, twid)
1458 subtitles = self._merge_subtitles(subtitles, subs)
1459 formats.extend(fmts)
1461 thumbnails = []
1462 media_url = media.get('media_url_https') or media.get('media_url')
1463 if media_url:
1464 def add_thumbnail(name, size):
1465 thumbnails.append({
1466 'id': name,
1467 'url': update_url_query(media_url, {'name': name}),
1468 'width': int_or_none(size.get('w') or size.get('width')),
1469 'height': int_or_none(size.get('h') or size.get('height')),
1471 for name, size in media.get('sizes', {}).items():
1472 add_thumbnail(name, size)
1473 add_thumbnail('orig', media.get('original_info') or {})
1475 return {
1476 'id': media_id,
1477 'formats': formats,
1478 'subtitles': subtitles,
1479 'thumbnails': thumbnails,
1480 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
1481 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1482 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1483 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1486 def extract_from_card_info(card):
1487 if not card:
1488 return
1490 self.write_debug(f'Extracting from card info: {card.get("url")}')
1491 binding_values = card['binding_values']
1493 def get_binding_value(k):
1494 o = binding_values.get(k) or {}
1495 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1497 card_name = card['name'].split(':')[-1]
1498 if card_name == 'player':
1499 yield {
1500 '_type': 'url',
1501 'url': get_binding_value('player_url'),
1503 elif card_name == 'periscope_broadcast':
1504 yield {
1505 '_type': 'url',
1506 'url': get_binding_value('url') or get_binding_value('player_url'),
1507 'ie_key': PeriscopeIE.ie_key(),
1509 elif card_name == 'broadcast':
1510 yield {
1511 '_type': 'url',
1512 'url': get_binding_value('broadcast_url'),
1513 'ie_key': TwitterBroadcastIE.ie_key(),
1515 elif card_name == 'audiospace':
1516 yield {
1517 '_type': 'url',
1518 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1519 'ie_key': TwitterSpacesIE.ie_key(),
1521 elif card_name == 'summary':
1522 yield {
1523 '_type': 'url',
1524 'url': get_binding_value('card_url'),
1526 elif card_name == 'unified_card':
1527 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1528 yield from map(extract_from_video_info, traverse_obj(
1529 unified_card, ('media_entities', ...), expected_type=dict))
1530 # amplify, promo_video_website, promo_video_convo, appplayer,
1531 # video_direct_message, poll2choice_video, poll3choice_video,
1532 # poll4choice_video, ...
1533 else:
1534 is_amplify = card_name == 'amplify'
1535 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1536 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1537 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1539 thumbnails = []
1540 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1541 image = get_binding_value('player_image' + suffix) or {}
1542 image_url = image.get('url')
1543 if not image_url or '/player-placeholder' in image_url:
1544 continue
1545 thumbnails.append({
1546 'id': suffix[1:] if suffix else 'medium',
1547 'url': image_url,
1548 'width': int_or_none(image.get('width')),
1549 'height': int_or_none(image.get('height')),
1552 yield {
1553 'formats': formats,
1554 'subtitles': subtitles,
1555 'thumbnails': thumbnails,
1556 'duration': int_or_none(get_binding_value(
1557 'content_duration_seconds')),
1560 videos = traverse_obj(status, (
1561 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1563 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1564 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1565 else:
1566 desired_obj = traverse_obj(status, (
1567 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1568 if not desired_obj:
1569 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1570 elif desired_obj.get('type') != 'video':
1571 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1573 # Restore original archive id and video index in title
1574 for index, entry in enumerate(videos, 1):
1575 if entry.get('id') != desired_obj.get('id'):
1576 continue
1577 if index == 1:
1578 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1579 if len(videos) != 1:
1580 info['title'] += f' #{index}'
1581 break
1583 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1585 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1586 if not entries:
1587 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1588 if not expanded_url or expanded_url == url:
1589 self.raise_no_formats('No video could be found in this tweet', expected=True)
1590 return info
1592 return self.url_result(expanded_url, display_id=twid, **info)
1594 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1596 if len(entries) == 1:
1597 return entries[0]
1599 for index, entry in enumerate(entries, 1):
1600 entry['title'] += f' #{index}'
1602 return self.playlist_result(entries, **info)
1605 class TwitterAmplifyIE(TwitterBaseIE):
1606 IE_NAME = 'twitter:amplify'
1607 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1609 _TEST = {
1610 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1611 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1612 'info_dict': {
1613 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1614 'ext': 'mp4',
1615 'title': 'Twitter Video',
1616 'thumbnail': 're:^https?://.*',
1618 'params': {'format': '[protocol=https]'},
1621 def _real_extract(self, url):
1622 video_id = self._match_id(url)
1623 webpage = self._download_webpage(url, video_id)
1625 vmap_url = self._html_search_meta(
1626 'twitter:amplify:vmap', webpage, 'vmap url')
1627 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1629 thumbnails = []
1630 thumbnail = self._html_search_meta(
1631 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1633 def _find_dimension(target):
1634 w = int_or_none(self._html_search_meta(
1635 f'twitter:{target}:width', webpage, fatal=False))
1636 h = int_or_none(self._html_search_meta(
1637 f'twitter:{target}:height', webpage, fatal=False))
1638 return w, h
1640 if thumbnail:
1641 thumbnail_w, thumbnail_h = _find_dimension('image')
1642 thumbnails.append({
1643 'url': thumbnail,
1644 'width': thumbnail_w,
1645 'height': thumbnail_h,
1648 video_w, video_h = _find_dimension('player')
1649 formats[0].update({
1650 'width': video_w,
1651 'height': video_h,
1654 return {
1655 'id': video_id,
1656 'title': 'Twitter Video',
1657 'formats': formats,
1658 'thumbnails': thumbnails,
1662 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1663 IE_NAME = 'twitter:broadcast'
1664 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1666 _TESTS = [{
1667 # untitled Periscope video
1668 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1669 'info_dict': {
1670 'id': '1yNGaQLWpejGj',
1671 'ext': 'mp4',
1672 'title': 'Andrea May Sahouri - Periscope Broadcast',
1673 'uploader': 'Andrea May Sahouri',
1674 'uploader_id': 'andreamsahouri',
1675 'uploader_url': 'https://twitter.com/andreamsahouri',
1676 'timestamp': 1590973638,
1677 'upload_date': '20200601',
1678 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1679 'view_count': int,
1681 }, {
1682 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1683 'info_dict': {
1684 'id': '1ZkKzeyrPbaxv',
1685 'ext': 'mp4',
1686 'title': 'Starship | SN10 | High-Altitude Flight Test',
1687 'uploader': 'SpaceX',
1688 'uploader_id': 'SpaceX',
1689 'uploader_url': 'https://twitter.com/SpaceX',
1690 'timestamp': 1614812942,
1691 'upload_date': '20210303',
1692 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1693 'view_count': int,
1695 }, {
1696 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1697 'info_dict': {
1698 'id': '1OyKAVQrgzwGb',
1699 'ext': 'mp4',
1700 'title': 'Starship Flight Test',
1701 'uploader': 'SpaceX',
1702 'uploader_id': 'SpaceX',
1703 'uploader_url': 'https://twitter.com/SpaceX',
1704 'timestamp': 1681993964,
1705 'upload_date': '20230420',
1706 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1707 'view_count': int,
1711 def _real_extract(self, url):
1712 broadcast_id = self._match_id(url)
1713 broadcast = self._call_api(
1714 'broadcasts/show.json', broadcast_id,
1715 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1716 if not broadcast:
1717 raise ExtractorError('Broadcast no longer exists', expected=True)
1718 info = self._parse_broadcast_data(broadcast, broadcast_id)
1719 info['title'] = broadcast.get('status') or info.get('title')
1720 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1721 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1722 if info['live_status'] == 'is_upcoming':
1723 return info
1725 media_key = broadcast['media_key']
1726 source = self._call_api(
1727 f'live_video_stream/status/{media_key}', media_key)['source']
1728 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1729 if '/live_video_stream/geoblocked/' in m3u8_url:
1730 self.raise_geo_restricted()
1731 m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse(
1732 m3u8_url).query).get('type', [None])[0]
1733 state, width, height = self._extract_common_format_info(broadcast)
1734 info['formats'] = self._extract_pscp_m3u8_formats(
1735 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1736 return info
1739 class TwitterSpacesIE(TwitterBaseIE):
1740 IE_NAME = 'twitter:spaces'
1741 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1743 _TESTS = [{
1744 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1745 'info_dict': {
1746 'id': '1RDxlgyvNXzJL',
1747 'ext': 'm4a',
1748 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1749 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1750 'uploader': r're:Lucio Di Gaetano.*?',
1751 'uploader_id': 'luciodigaetano',
1752 'live_status': 'was_live',
1753 'timestamp': 1659877956,
1754 'upload_date': '20220807',
1755 'release_timestamp': 1659904215,
1756 'release_date': '20220807',
1758 'skip': 'No longer available',
1759 }, {
1760 # post_live/TimedOut but downloadable
1761 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1762 'info_dict': {
1763 'id': '1vAxRAVQWONJl',
1764 'ext': 'm4a',
1765 'title': 'Framing Up FinOps: Billing Tools',
1766 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1767 'uploader': 'Google Cloud',
1768 'uploader_id': 'googlecloud',
1769 'live_status': 'post_live',
1770 'timestamp': 1681409554,
1771 'upload_date': '20230413',
1772 'release_timestamp': 1681839000,
1773 'release_date': '20230418',
1774 'protocol': 'm3u8', # ffmpeg is forced
1775 'container': 'm4a_dash', # audio-only format fixup is applied
1777 'params': {'skip_download': 'm3u8'},
1778 }, {
1779 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1780 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1781 'info_dict': {
1782 'id': '1eaKbrQbjoRKX',
1783 'ext': 'm4a',
1784 'title': 'あ',
1785 'description': 'Twitter Space participated by nobody yet',
1786 'uploader': '息根とめる',
1787 'uploader_id': 'tomeru_ikinone',
1788 'live_status': 'was_live',
1789 'timestamp': 1685617198,
1790 'upload_date': '20230601',
1791 'protocol': 'm3u8', # ffmpeg is forced
1792 'container': 'm4a_dash', # audio-only format fixup is applied
1794 'params': {'skip_download': 'm3u8'},
1795 }, {
1796 # Video Space
1797 'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
1798 'info_dict': {
1799 'id': '1DXGydznBYWKM',
1800 'ext': 'mp4',
1801 'title': 'America and Israel’s “special relationship”',
1802 'description': 'Twitter Space participated by nobody yet',
1803 'uploader': 'Candace Owens',
1804 'uploader_id': 'RealCandaceO',
1805 'live_status': 'was_live',
1806 'timestamp': 1723931351,
1807 'upload_date': '20240817',
1808 'release_timestamp': 1723932000,
1809 'release_date': '20240817',
1810 'protocol': 'm3u8_native', # not ffmpeg, detected as video space
1812 'params': {'skip_download': 'm3u8'},
1815 SPACE_STATUS = {
1816 'notstarted': 'is_upcoming',
1817 'ended': 'was_live',
1818 'running': 'is_live',
1819 'timedout': 'post_live',
1822 def _build_graphql_query(self, space_id):
1823 return {
1824 'variables': {
1825 'id': space_id,
1826 'isMetatagsQuery': True,
1827 'withDownvotePerspective': False,
1828 'withReactionsMetadata': False,
1829 'withReactionsPerspective': False,
1830 'withReplays': True,
1831 'withSuperFollowsUserFields': True,
1832 'withSuperFollowsTweetFields': True,
1834 'features': {
1835 'dont_mention_me_view_api_enabled': True,
1836 'interactive_text_enabled': True,
1837 'responsive_web_edit_tweet_api_enabled': True,
1838 'responsive_web_enhance_cards_enabled': True,
1839 'responsive_web_uc_gql_enabled': True,
1840 'spaces_2022_h2_clipping': True,
1841 'spaces_2022_h2_spaces_communities': False,
1842 'standardized_nudges_misinfo': True,
1843 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1844 'vibe_api_enabled': True,
1848 def _real_extract(self, url):
1849 space_id = self._match_id(url)
1850 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1851 if not space_data:
1852 raise ExtractorError('Twitter Space not found', expected=True)
1854 metadata = space_data['metadata']
1855 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1856 is_live = live_status == 'is_live'
1858 formats = []
1859 headers = {'Referer': 'https://twitter.com/'}
1860 if live_status == 'is_upcoming':
1861 self.raise_no_formats('Twitter Space not started yet', expected=True)
1862 elif not is_live and not metadata.get('is_space_available_for_replay'):
1863 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1864 elif metadata.get('media_key'):
1865 source = traverse_obj(
1866 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1867 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1868 is_audio_space = source and 'audio-space' in source
1869 formats = self._extract_m3u8_formats(
1870 source, metadata['media_key'], 'm4a' if is_audio_space else 'mp4',
1871 # XXX: Some audio-only Spaces need ffmpeg as downloader
1872 entry_protocol='m3u8' if is_audio_space else 'm3u8_native',
1873 live=is_live, headers=headers, fatal=False) if source else []
1874 if is_audio_space:
1875 for fmt in formats:
1876 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1877 if not is_live:
1878 fmt['container'] = 'm4a_dash'
1880 participants = ', '.join(traverse_obj(
1881 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1883 if not formats and live_status == 'post_live':
1884 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1886 return {
1887 'id': space_id,
1888 'title': metadata.get('title'),
1889 'description': f'Twitter Space participated by {participants}',
1890 'uploader': traverse_obj(
1891 metadata, ('creator_results', 'result', 'legacy', 'name')),
1892 'uploader_id': traverse_obj(
1893 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1894 'live_status': live_status,
1895 'release_timestamp': try_call(
1896 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1897 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1898 'formats': formats,
1899 'http_headers': headers,
1903 class TwitterShortenerIE(TwitterBaseIE):
1904 IE_NAME = 'twitter:shortener'
1905 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1906 _BASE_URL = 'https://t.co/'
1908 def _real_extract(self, url):
1909 mobj = self._match_valid_url(url)
1910 eid, shortcode = mobj.group('eid', 'id')
1911 if eid:
1912 shortcode = eid
1913 url = self._BASE_URL + shortcode
1914 new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url
1915 __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
1916 if new_url.startswith(__UNSAFE_LINK):
1917 new_url = new_url.replace(__UNSAFE_LINK, '')
1918 return self.url_result(new_url)