yt_dlp/extractor/twitter.py

   1 import functools
   2 import json
   3 import random
   4 import re
   5 import urllib.parse
   6
   7 from .common import InfoExtractor
   8 from .periscope import PeriscopeBaseIE, PeriscopeIE
   9 from ..networking.exceptions import HTTPError
  10 from ..utils import (
  11     ExtractorError,
  12     dict_get,
  13     filter_dict,
  14     float_or_none,
  15     format_field,
  16     int_or_none,
  17     join_nonempty,
  18     make_archive_id,
  19     remove_end,
  20     str_or_none,
  21     strip_or_none,
  22     traverse_obj,
  23     try_call,
  24     try_get,
  25     unified_timestamp,
  26     update_url_query,
  27     url_or_none,
  28     xpath_text,
  29 )
  30
  31
  32 class TwitterBaseIE(InfoExtractor):
  33     _NETRC_MACHINE = 'twitter'
  34     _API_BASE = 'https://api.x.com/1.1/'
  35     _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
  36     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  37     _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
  38     _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
  39     _flow_token = None
  40
  41     _LOGIN_INIT_DATA = json.dumps({
  42         'input_flow_data': {
  43             'flow_context': {
  44                 'debug_overrides': {},
  45                 'start_location': {
  46                     'location': 'unknown',
  47                 },
  48             },
  49         },
  50         'subtask_versions': {
  51             'action_list': 2,
  52             'alert_dialog': 1,
  53             'app_download_cta': 1,
  54             'check_logged_in_account': 1,
  55             'choice_selection': 3,
  56             'contacts_live_sync_permission_prompt': 0,
  57             'cta': 7,
  58             'email_verification': 2,
  59             'end_flow': 1,
  60             'enter_date': 1,
  61             'enter_email': 2,
  62             'enter_password': 5,
  63             'enter_phone': 2,
  64             'enter_recaptcha': 1,
  65             'enter_text': 5,
  66             'enter_username': 2,
  67             'generic_urt': 3,
  68             'in_app_notification': 1,
  69             'interest_picker': 3,
  70             'js_instrumentation': 1,
  71             'menu_dialog': 1,
  72             'notifications_permission_prompt': 2,
  73             'open_account': 2,
  74             'open_home_timeline': 1,
  75             'open_link': 1,
  76             'phone_verification': 4,
  77             'privacy_options': 1,
  78             'security_key': 3,
  79             'select_avatar': 4,
  80             'select_banner': 2,
  81             'settings_list': 7,
  82             'show_code': 1,
  83             'sign_up': 2,
  84             'sign_up_review': 4,
  85             'tweet_selection_urt': 1,
  86             'update_users': 1,
  87             'upload_media': 1,
  88             'user_recommendations_list': 4,
  89             'user_recommendations_urt': 1,
  90             'wait_spinner': 3,
  91             'web_modal': 1,
  92         },
  93     }, separators=(',', ':')).encode()
  94
  95     def _extract_variant_formats(self, variant, video_id):
  96         variant_url = variant.get('url')
  97         if not variant_url:
  98             return [], {}
  99         elif '.m3u8' in variant_url:
 100             fmts, subs = self._extract_m3u8_formats_and_subtitles(
 101                 variant_url, video_id, 'mp4', 'm3u8_native',
 102                 m3u8_id='hls', fatal=False)
 103             for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
 104                 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
 105                     f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
 106             return fmts, subs
 107         else:
 108             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
 109             f = {
 110                 'url': variant_url,
 111                 'format_id': join_nonempty('http', tbr),
 112                 'tbr': tbr,
 113             }
 114             self._search_dimensions_in_video_url(f, variant_url)
 115             return [f], {}
 116
 117     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
 118         vmap_url = url_or_none(vmap_url)
 119         if not vmap_url:
 120             return [], {}
 121         vmap_data = self._download_xml(vmap_url, video_id)
 122         formats = []
 123         subtitles = {}
 124         urls = []
 125         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
 126             video_variant.attrib['url'] = urllib.parse.unquote(
 127                 video_variant.attrib['url'])
 128             urls.append(video_variant.attrib['url'])
 129             fmts, subs = self._extract_variant_formats(
 130                 video_variant.attrib, video_id)
 131             formats.extend(fmts)
 132             subtitles = self._merge_subtitles(subtitles, subs)
 133         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
 134         if video_url not in urls:
 135             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
 136             formats.extend(fmts)
 137             subtitles = self._merge_subtitles(subtitles, subs)
 138         return formats, subtitles
 139
 140     @staticmethod
 141     def _search_dimensions_in_video_url(a_format, video_url):
 142         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
 143         if m:
 144             a_format.update({
 145                 'width': int(m.group('width')),
 146                 'height': int(m.group('height')),
 147             })
 148
 149     @property
 150     def is_logged_in(self):
 151         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 152
 153     # XXX: Temporary workaround until twitter.com => x.com migration is completed
 154     def _real_initialize(self):
 155         if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
 156             return
 157         # User has not yet been migrated to x.com and has passed twitter.com cookies
 158         TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
 159         TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
 160
 161     @functools.cached_property
 162     def _selected_api(self):
 163         return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
 164
 165     def _fetch_guest_token(self, display_id):
 166         guest_token = traverse_obj(self._download_json(
 167             f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
 168             headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
 169             ('guest_token', {str}))
 170         if not guest_token:
 171             raise ExtractorError('Could not retrieve guest token')
 172         return guest_token
 173
 174     def _set_base_headers(self, legacy=False):
 175         bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
 176         return filter_dict({
 177             'Authorization': f'Bearer {bearer_token}',
 178             'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
 179         })
 180
 181     def _call_login_api(self, note, headers, query={}, data=None):
 182         response = self._download_json(
 183             f'{self._API_BASE}onboarding/task.json', None, note,
 184             headers=headers, query=query, data=data, expected_status=400)
 185         error = traverse_obj(response, ('errors', 0, 'message', {str}))
 186         if error:
 187             raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
 188         elif traverse_obj(response, 'status') != 'success':
 189             raise ExtractorError('Login was unsuccessful')
 190
 191         subtask = traverse_obj(
 192             response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
 193         if not subtask:
 194             raise ExtractorError('Twitter API did not return next login subtask')
 195
 196         self._flow_token = response['flow_token']
 197
 198         return subtask
 199
 200     def _perform_login(self, username, password):
 201         if self.is_logged_in:
 202             return
 203
 204         guest_token = self._fetch_guest_token(None)
 205         headers = {
 206             **self._set_base_headers(),
 207             'content-type': 'application/json',
 208             'x-guest-token': guest_token,
 209             'x-twitter-client-language': 'en',
 210             'x-twitter-active-user': 'yes',
 211             'Referer': 'https://x.com/',
 212             'Origin': 'https://x.com',
 213         }
 214
 215         def build_login_json(*subtask_inputs):
 216             return json.dumps({
 217                 'flow_token': self._flow_token,
 218                 'subtask_inputs': subtask_inputs,
 219             }, separators=(',', ':')).encode()
 220
 221         def input_dict(subtask_id, text):
 222             return {
 223                 'subtask_id': subtask_id,
 224                 'enter_text': {
 225                     'text': text,
 226                     'link': 'next_link',
 227                 },
 228             }
 229
 230         next_subtask = self._call_login_api(
 231             'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
 232
 233         while not self.is_logged_in:
 234             if next_subtask == 'LoginJsInstrumentationSubtask':
 235                 next_subtask = self._call_login_api(
 236                     'Submitting JS instrumentation response', headers, data=build_login_json({
 237                         'subtask_id': next_subtask,
 238                         'js_instrumentation': {
 239                             'response': '{}',
 240                             'link': 'next_link',
 241                         },
 242                     }))
 243
 244             elif next_subtask == 'LoginEnterUserIdentifierSSO':
 245                 next_subtask = self._call_login_api(
 246                     'Submitting username', headers, data=build_login_json({
 247                         'subtask_id': next_subtask,
 248                         'settings_list': {
 249                             'setting_responses': [{
 250                                 'key': 'user_identifier',
 251                                 'response_data': {
 252                                     'text_data': {
 253                                         'result': username,
 254                                     },
 255                                 },
 256                             }],
 257                             'link': 'next_link',
 258                         },
 259                     }))
 260
 261             elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
 262                 next_subtask = self._call_login_api(
 263                     'Submitting alternate identifier', headers,
 264                     data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
 265                         'one of username, phone number or email that was not used as --username'))))
 266
 267             elif next_subtask == 'LoginEnterPassword':
 268                 next_subtask = self._call_login_api(
 269                     'Submitting password', headers, data=build_login_json({
 270                         'subtask_id': next_subtask,
 271                         'enter_password': {
 272                             'password': password,
 273                             'link': 'next_link',
 274                         },
 275                     }))
 276
 277             elif next_subtask == 'AccountDuplicationCheck':
 278                 next_subtask = self._call_login_api(
 279                     'Submitting account duplication check', headers, data=build_login_json({
 280                         'subtask_id': next_subtask,
 281                         'check_logged_in_account': {
 282                             'link': 'AccountDuplicationCheck_false',
 283                         },
 284                     }))
 285
 286             elif next_subtask == 'LoginTwoFactorAuthChallenge':
 287                 next_subtask = self._call_login_api(
 288                     'Submitting 2FA token', headers, data=build_login_json(input_dict(
 289                         next_subtask, self._get_tfa_info('two-factor authentication token'))))
 290
 291             elif next_subtask == 'LoginAcid':
 292                 next_subtask = self._call_login_api(
 293                     'Submitting confirmation code', headers, data=build_login_json(input_dict(
 294                         next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
 295
 296             elif next_subtask == 'ArkoseLogin':
 297                 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
 298
 299             elif next_subtask == 'DenyLoginSubtask':
 300                 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
 301
 302             elif next_subtask == 'LoginSuccessSubtask':
 303                 raise ExtractorError('Twitter API did not grant auth token cookie')
 304
 305             else:
 306                 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
 307
 308         self.report_login()
 309
 310     def _call_api(self, path, video_id, query={}, graphql=False):
 311         headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
 312         headers.update({
 313             'x-twitter-auth-type': 'OAuth2Session',
 314             'x-twitter-client-language': 'en',
 315             'x-twitter-active-user': 'yes',
 316         } if self.is_logged_in else {
 317             'x-guest-token': self._fetch_guest_token(video_id),
 318         })
 319         allowed_status = {400, 401, 403, 404} if graphql else {403}
 320         result = self._download_json(
 321             (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 322             video_id, headers=headers, query=query, expected_status=allowed_status,
 323             note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
 324
 325         if result.get('errors'):
 326             errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
 327             if errors and 'not authorized' in errors:
 328                 self.raise_login_required(remove_end(errors, '.'))
 329             raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
 330
 331         return result
 332
 333     def _build_graphql_query(self, media_id):
 334         raise NotImplementedError('Method must be implemented to support GraphQL')
 335
 336     def _call_graphql_api(self, endpoint, media_id):
 337         data = self._build_graphql_query(media_id)
 338         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 339         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 340
 341
 342 class TwitterCardIE(InfoExtractor):
 343     IE_NAME = 'twitter:card'
 344     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 345     _TESTS = [
 346         {
 347             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 348             # MD5 checksums are different in different places
 349             'info_dict': {
 350                 'id': '560070131976392705',
 351                 'ext': 'mp4',
 352                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 353                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 354                 'uploader': 'Twitter',
 355                 'uploader_id': 'Twitter',
 356                 'thumbnail': r're:^https?://.*\.jpg',
 357                 'duration': 30.033,
 358                 'timestamp': 1422366112,
 359                 'upload_date': '20150127',
 360                 'age_limit': 0,
 361                 'comment_count': int,
 362                 'tags': [],
 363                 'repost_count': int,
 364                 'like_count': int,
 365                 'display_id': '560070183650213889',
 366                 'uploader_url': 'https://twitter.com/Twitter',
 367             },
 368         },
 369         {
 370             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 371             'md5': '7137eca597f72b9abbe61e5ae0161399',
 372             'info_dict': {
 373                 'id': '623160978427936768',
 374                 'ext': 'mp4',
 375                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 376                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 377                 'uploader': 'NASA',
 378                 'uploader_id': 'NASA',
 379                 'timestamp': 1437408129,
 380                 'upload_date': '20150720',
 381                 'uploader_url': 'https://twitter.com/NASA',
 382                 'age_limit': 0,
 383                 'comment_count': int,
 384                 'like_count': int,
 385                 'repost_count': int,
 386                 'tags': ['PlutoFlyby'],
 387             },
 388             'params': {'format': '[protocol=https]'},
 389         },
 390         {
 391             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 392             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 393             'info_dict': {
 394                 'id': 'dq4Oj5quskI',
 395                 'ext': 'mp4',
 396                 'title': 'Ubuntu 11.10 Overview',
 397                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 398                 'upload_date': '20111013',
 399                 'uploader': 'OMG! UBUNTU!',
 400                 'uploader_id': 'omgubuntu',
 401                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 402                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 403                 'channel_follower_count': int,
 404                 'chapters': 'count:8',
 405                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 406                 'duration': 138,
 407                 'categories': ['Film & Animation'],
 408                 'age_limit': 0,
 409                 'comment_count': int,
 410                 'availability': 'public',
 411                 'like_count': int,
 412                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 413                 'view_count': int,
 414                 'tags': 'count:12',
 415                 'channel': 'OMG! UBUNTU!',
 416                 'playable_in_embed': True,
 417             },
 418             'add_ie': ['Youtube'],
 419         },
 420         {
 421             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 422             'info_dict': {
 423                 'id': 'iBb2x00UVlv',
 424                 'ext': 'mp4',
 425                 'upload_date': '20151113',
 426                 'uploader_id': '1189339351084113920',
 427                 'uploader': 'ArsenalTerje',
 428                 'title': 'Vine by ArsenalTerje',
 429                 'timestamp': 1447451307,
 430                 'alt_title': 'Vine by ArsenalTerje',
 431                 'comment_count': int,
 432                 'like_count': int,
 433                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 434                 'view_count': int,
 435                 'repost_count': int,
 436             },
 437             'add_ie': ['Vine'],
 438             'params': {'skip_download': 'm3u8'},
 439         },
 440         {
 441             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 442             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 443             'info_dict': {
 444                 'id': '705235433198714880',
 445                 'ext': 'mp4',
 446                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 447                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 448                 'uploader': 'Brent Yarina',
 449                 'uploader_id': 'BTNBrentYarina',
 450                 'timestamp': 1456976204,
 451                 'upload_date': '20160303',
 452             },
 453             'skip': 'This content is no longer available.',
 454         },
 455         {
 456             'url': 'https://twitter.com/i/videos/752274308186120192',
 457             'only_matching': True,
 458         },
 459     ]
 460
 461     def _real_extract(self, url):
 462         status_id = self._match_id(url)
 463         return self.url_result(
 464             'https://twitter.com/statuses/' + status_id,
 465             TwitterIE.ie_key(), status_id)
 466
 467
 468 class TwitterIE(TwitterBaseIE):
 469     IE_NAME = 'twitter'
 470     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 471
 472     _TESTS = [{
 473         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 474         'info_dict': {
 475             'id': '643211870443208704',
 476             'display_id': '643211948184596480',
 477             'ext': 'mp4',
 478             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 479             'thumbnail': r're:^https?://.*\.jpg',
 480             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 481             'channel_id': '549749560',
 482             'uploader': 'FREE THE NIPPLE',
 483             'uploader_id': 'freethenipple',
 484             'duration': 12.922,
 485             'timestamp': 1442188653,
 486             'upload_date': '20150913',
 487             'uploader_url': 'https://twitter.com/freethenipple',
 488             'comment_count': int,
 489             'repost_count': int,
 490             'like_count': int,
 491             'tags': [],
 492             'age_limit': 18,
 493             '_old_archive_ids': ['twitter 643211948184596480'],
 494         },
 495         'skip': 'Requires authentication',
 496     }, {
 497         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 498         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 499         'info_dict': {
 500             'id': '657991469417025536',
 501             'ext': 'mp4',
 502             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 503             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 504             'thumbnail': r're:^https?://.*\.png',
 505             'uploader': 'Gifs',
 506             'uploader_id': 'giphz',
 507         },
 508         'expected_warnings': ['height', 'width'],
 509         'skip': 'Account suspended',
 510     }, {
 511         'url': 'https://twitter.com/starwars/status/665052190608723968',
 512         'info_dict': {
 513             'id': '665052190608723968',
 514             'display_id': '665052190608723968',
 515             'ext': 'mp4',
 516             'title': r're:Star Wars.*A new beginning is coming December 18.*',
 517             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 518             'channel_id': '20106852',
 519             'uploader_id': 'starwars',
 520             'uploader': r're:Star Wars.*',
 521             'timestamp': 1447395772,
 522             'upload_date': '20151113',
 523             'uploader_url': 'https://twitter.com/starwars',
 524             'comment_count': int,
 525             'repost_count': int,
 526             'like_count': int,
 527             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 528             'age_limit': 0,
 529             '_old_archive_ids': ['twitter 665052190608723968'],
 530         },
 531     }, {
 532         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 533         'info_dict': {
 534             'id': '705235433198714880',
 535             'ext': 'mp4',
 536             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 537             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 538             'uploader_id': 'BTNBrentYarina',
 539             'uploader': 'Brent Yarina',
 540             'timestamp': 1456976204,
 541             'upload_date': '20160303',
 542             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 543             'comment_count': int,
 544             'repost_count': int,
 545             'like_count': int,
 546             'tags': [],
 547             'age_limit': 0,
 548         },
 549         'params': {
 550             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 551             # Test case of TwitterCardIE
 552             'skip_download': True,
 553         },
 554         'skip': 'Dead external link',
 555     }, {
 556         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 557         'info_dict': {
 558             'id': '700207414000242688',
 559             'display_id': '700207533655363584',
 560             'ext': 'mp4',
 561             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 562             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 563             'thumbnail': r're:^https?://.*\.jpg',
 564             'channel_id': '1383165541',
 565             'uploader': 'jaydin donte geer',
 566             'uploader_id': 'jaydingeer',
 567             'duration': 30.0,
 568             'timestamp': 1455777459,
 569             'upload_date': '20160218',
 570             'uploader_url': 'https://twitter.com/jaydingeer',
 571             'comment_count': int,
 572             'repost_count': int,
 573             'like_count': int,
 574             'tags': ['Damndaniel'],
 575             'age_limit': 0,
 576             '_old_archive_ids': ['twitter 700207533655363584'],
 577         },
 578     }, {
 579         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 580         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 581         'info_dict': {
 582             'id': 'MIOxnrUteUd',
 583             'ext': 'mp4',
 584             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 585             'uploader': 'TAKUMA',
 586             'uploader_id': '1004126642786242560',
 587             'timestamp': 1402826626,
 588             'upload_date': '20140615',
 589             'thumbnail': r're:^https?://.*\.jpg',
 590             'alt_title': 'Vine by TAKUMA',
 591             'comment_count': int,
 592             'repost_count': int,
 593             'like_count': int,
 594             'view_count': int,
 595         },
 596         'add_ie': ['Vine'],
 597     }, {
 598         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 599         'info_dict': {
 600             'id': '717462543795523584',
 601             'display_id': '719944021058060289',
 602             'ext': 'mp4',
 603             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 604             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 605             'channel_id': '701615052',
 606             'uploader_id': 'CaptainAmerica',
 607             'uploader': 'Captain America',
 608             'duration': 3.17,
 609             'timestamp': 1460483005,
 610             'upload_date': '20160412',
 611             'uploader_url': 'https://twitter.com/CaptainAmerica',
 612             'thumbnail': r're:^https?://.*\.jpg',
 613             'comment_count': int,
 614             'repost_count': int,
 615             'like_count': int,
 616             'tags': [],
 617             'age_limit': 0,
 618             '_old_archive_ids': ['twitter 719944021058060289'],
 619         },
 620     }, {
 621         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 622         'info_dict': {
 623             'id': '1zqKVVlkqLaKB',
 624             'ext': 'mp4',
 625             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 626             'upload_date': '20160923',
 627             'uploader_id': '1PmKqpJdOJQoY',
 628             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 629             'timestamp': 1474613214,
 630             'thumbnail': r're:^https?://.*\.jpg',
 631         },
 632         'add_ie': ['Periscope'],
 633         'skip': 'Broadcast not found',
 634     }, {
 635         # has mp4 formats via mobile API
 636         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 637         'info_dict': {
 638             'id': '852077943283097602',
 639             'ext': 'mp4',
 640             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 641             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 642             'channel_id': '2526757026',
 643             'uploader': 'عالم الأخبار',
 644             'uploader_id': 'news_al3alm',
 645             'duration': 277.4,
 646             'timestamp': 1492000653,
 647             'upload_date': '20170412',
 648             'display_id': '852138619213144067',
 649             'age_limit': 0,
 650             'uploader_url': 'https://twitter.com/news_al3alm',
 651             'thumbnail': r're:^https?://.*\.jpg',
 652             'tags': [],
 653             'repost_count': int,
 654             'like_count': int,
 655             'comment_count': int,
 656             '_old_archive_ids': ['twitter 852138619213144067'],
 657         },
 658     }, {
 659         'url': 'https://twitter.com/i/web/status/910031516746514432',
 660         'info_dict': {
 661             'id': '910030238373089285',
 662             'display_id': '910031516746514432',
 663             'ext': 'mp4',
 664             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 665             'thumbnail': r're:^https?://.*\.jpg',
 666             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 667             'channel_id': '2319432498',
 668             'uploader': 'Préfet de Guadeloupe',
 669             'uploader_id': 'Prefet971',
 670             'duration': 47.48,
 671             'timestamp': 1505803395,
 672             'upload_date': '20170919',
 673             'uploader_url': 'https://twitter.com/Prefet971',
 674             'comment_count': int,
 675             'repost_count': int,
 676             'like_count': int,
 677             'tags': ['Maria'],
 678             'age_limit': 0,
 679             '_old_archive_ids': ['twitter 910031516746514432'],
 680         },
 681         'params': {
 682             'skip_download': True,  # requires ffmpeg
 683         },
 684     }, {
 685         # card via api.twitter.com/1.1/videos/tweet/config
 686         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 687         'info_dict': {
 688             'id': '1001551417340022785',
 689             'display_id': '1001551623938805763',
 690             'ext': 'mp4',
 691             'title': 're:.*?Shep is on a roll today.*?',
 692             'thumbnail': r're:^https?://.*\.jpg',
 693             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 694             'channel_id': '255036353',
 695             'uploader': 'Lis Power',
 696             'uploader_id': 'LisPower1',
 697             'duration': 111.278,
 698             'timestamp': 1527623489,
 699             'upload_date': '20180529',
 700             'uploader_url': 'https://twitter.com/LisPower1',
 701             'comment_count': int,
 702             'repost_count': int,
 703             'like_count': int,
 704             'tags': [],
 705             'age_limit': 0,
 706             '_old_archive_ids': ['twitter 1001551623938805763'],
 707         },
 708         'params': {
 709             'skip_download': True,  # requires ffmpeg
 710         },
 711     }, {
 712         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 713         'info_dict': {
 714             'id': '1087791272830607360',
 715             'display_id': '1087791357756956680',
 716             'ext': 'mp4',
 717             'title': 'X - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 718             'thumbnail': r're:^https?://.*\.jpg',
 719             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 720             'uploader': 'X',
 721             'uploader_id': 'X',
 722             'duration': 61.567,
 723             'timestamp': 1548184644,
 724             'upload_date': '20190122',
 725             'uploader_url': 'https://twitter.com/X',
 726             'comment_count': int,
 727             'repost_count': int,
 728             'like_count': int,
 729             'view_count': int,
 730             'tags': [],
 731             'age_limit': 0,
 732         },
 733         'skip': 'This Tweet is unavailable',
 734     }, {
 735         # not available in Periscope
 736         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 737         'info_dict': {
 738             'id': '1vOGwqejwoWxB',
 739             'ext': 'mp4',
 740             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 741             'uploader': 'Vivi',
 742             'uploader_id': '1eVjYOLGkGrQL',
 743             'thumbnail': r're:^https?://.*\.jpg',
 744             'tags': ['EduTECH2019'],
 745             'view_count': int,
 746         },
 747         'add_ie': ['TwitterBroadcast'],
 748         'skip': 'Broadcast no longer exists',
 749     }, {
 750         # unified card
 751         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 752         'info_dict': {
 753             'id': '1349774757969989634',
 754             'display_id': '1349794411333394432',
 755             'ext': 'mp4',
 756             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 757             'thumbnail': r're:^https?://.*\.jpg',
 758             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 759             'channel_id': '18552281',
 760             'uploader': 'Brooklyn Nets',
 761             'uploader_id': 'BrooklynNets',
 762             'duration': 324.484,
 763             'timestamp': 1610651040,
 764             'upload_date': '20210114',
 765             'uploader_url': 'https://twitter.com/BrooklynNets',
 766             'comment_count': int,
 767             'repost_count': int,
 768             'like_count': int,
 769             'tags': [],
 770             'age_limit': 0,
 771             '_old_archive_ids': ['twitter 1349794411333394432'],
 772         },
 773         'params': {
 774             'skip_download': True,
 775         },
 776     }, {
 777         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 778         'info_dict': {
 779             'id': '1577855447914409984',
 780             'display_id': '1577855540407197696',
 781             'ext': 'mp4',
 782             'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
 783             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 784             'upload_date': '20221006',
 785             'channel_id': '143077138',
 786             'uploader': 'Oshtru',
 787             'uploader_id': 'oshtru',
 788             'uploader_url': 'https://twitter.com/oshtru',
 789             'thumbnail': r're:^https?://.*\.jpg',
 790             'duration': 30.03,
 791             'timestamp': 1665025050,
 792             'comment_count': int,
 793             'repost_count': int,
 794             'like_count': int,
 795             'tags': [],
 796             'age_limit': 0,
 797             '_old_archive_ids': ['twitter 1577855540407197696'],
 798         },
 799         'params': {'skip_download': True},
 800     }, {
 801         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 802         'info_dict': {
 803             'id': '1577719286659006464',
 804             'title': 'Ultima Reload - Test',
 805             'description': 'Test https://t.co/Y3KEZD7Dad',
 806             'channel_id': '168922496',
 807             'uploader': 'Ultima Reload',
 808             'uploader_id': 'UltimaShadowX',
 809             'uploader_url': 'https://twitter.com/UltimaShadowX',
 810             'upload_date': '20221005',
 811             'timestamp': 1664992565,
 812             'comment_count': int,
 813             'repost_count': int,
 814             'like_count': int,
 815             'tags': [],
 816             'age_limit': 0,
 817         },
 818         'playlist_count': 4,
 819         'params': {'skip_download': True},
 820     }, {
 821         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 822         'info_dict': {
 823             'id': '1575559336759263233',
 824             'display_id': '1575560063510810624',
 825             'ext': 'mp4',
 826             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 827             'thumbnail': r're:^https?://.*\.jpg',
 828             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 829             'channel_id': '1094109584',
 830             'uploader': 'Max Olson',
 831             'uploader_id': 'MesoMax919',
 832             'uploader_url': 'https://twitter.com/MesoMax919',
 833             'duration': 21.321,
 834             'timestamp': 1664477766,
 835             'upload_date': '20220929',
 836             'comment_count': int,
 837             'repost_count': int,
 838             'like_count': int,
 839             'tags': ['HurricaneIan'],
 840             'age_limit': 0,
 841             '_old_archive_ids': ['twitter 1575560063510810624'],
 842         },
 843     }, {
 844         # Adult content, fails if not logged in
 845         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 846         'info_dict': {
 847             'id': '1575199163847000068',
 848             'display_id': '1575199173472927762',
 849             'ext': 'mp4',
 850             'title': str,
 851             'description': str,
 852             'channel_id': '1217167793541480450',
 853             'uploader': str,
 854             'uploader_id': 'Rizdraws',
 855             'uploader_url': 'https://twitter.com/Rizdraws',
 856             'upload_date': '20220928',
 857             'timestamp': 1664391723,
 858             'thumbnail': r're:^https?://.+\.jpg',
 859             'like_count': int,
 860             'repost_count': int,
 861             'comment_count': int,
 862             'age_limit': 18,
 863             'tags': [],
 864             '_old_archive_ids': ['twitter 1575199173472927762'],
 865         },
 866         'params': {'skip_download': 'The media could not be played'},
 867         'skip': 'Requires authentication',
 868     }, {
 869         # Playlist result only with graphql API
 870         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 871         'playlist_mincount': 2,
 872         'info_dict': {
 873             'id': '1395079556562706435',
 874             'title': str,
 875             'tags': [],
 876             'channel_id': '21539378',
 877             'uploader': str,
 878             'like_count': int,
 879             'upload_date': '20210519',
 880             'age_limit': 0,
 881             'repost_count': int,
 882             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
 883             'uploader_id': 'Srirachachau',
 884             'comment_count': int,
 885             'uploader_url': 'https://twitter.com/Srirachachau',
 886             'timestamp': 1621447860,
 887         },
 888     }, {
 889         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 890         'playlist_mincount': 2,
 891         'info_dict': {
 892             'id': '1578353380363501568',
 893             'title': str,
 894             'channel_id': '2195866214',
 895             'uploader_id': 'DavidToons_',
 896             'repost_count': int,
 897             'like_count': int,
 898             'uploader': str,
 899             'timestamp': 1665143744,
 900             'uploader_url': 'https://twitter.com/DavidToons_',
 901             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
 902             'tags': [],
 903             'comment_count': int,
 904             'upload_date': '20221007',
 905             'age_limit': 0,
 906         },
 907     }, {
 908         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 909         'playlist_count': 2,
 910         'info_dict': {
 911             'id': '1578401165338976258',
 912             'title': str,
 913             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 914             'channel_id': '19338359',
 915             'uploader': str,
 916             'uploader_id': 'primevideouk',
 917             'timestamp': 1665155137,
 918             'upload_date': '20221007',
 919             'age_limit': 0,
 920             'uploader_url': 'https://twitter.com/primevideouk',
 921             'comment_count': int,
 922             'repost_count': int,
 923             'like_count': int,
 924             'tags': ['TheRingsOfPower'],
 925         },
 926     }, {
 927         # Twitter Spaces
 928         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 929         'info_dict': {
 930             'id': '1lPJqmBeeNAJb',
 931             'ext': 'm4a',
 932             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 933             'uploader': r're:Monique Camarra.+?',
 934             'uploader_id': 'MoniqueCamarra',
 935             'live_status': 'was_live',
 936             'release_timestamp': 1658417414,
 937             'description': 'md5:acce559345fd49f129c20dbcda3f1201',
 938             'timestamp': 1658407771,
 939             'release_date': '20220721',
 940             'upload_date': '20220721',
 941         },
 942         'add_ie': ['TwitterSpaces'],
 943         'params': {'skip_download': 'm3u8'},
 944         'skip': 'Requires authentication',
 945     }, {
 946         # URL specifies video number but --yes-playlist
 947         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 948         'playlist_mincount': 2,
 949         'info_dict': {
 950             'id': '1600649710662213632',
 951             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 952             'timestamp': 1670459604.0,
 953             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 954             'comment_count': int,
 955             'uploader_id': 'CTVJLaidlaw',
 956             'channel_id': '80082014',
 957             'repost_count': int,
 958             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 959             'upload_date': '20221208',
 960             'age_limit': 0,
 961             'uploader': 'Jocelyn Laidlaw',
 962             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 963             'like_count': int,
 964         },
 965     }, {
 966         # URL specifies video number and --no-playlist
 967         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 968         'info_dict': {
 969             'id': '1600649511827013632',
 970             'ext': 'mp4',
 971             'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
 972             'thumbnail': r're:^https?://.+\.jpg',
 973             'timestamp': 1670459604.0,
 974             'channel_id': '80082014',
 975             'uploader_id': 'CTVJLaidlaw',
 976             'uploader': 'Jocelyn Laidlaw',
 977             'repost_count': int,
 978             'comment_count': int,
 979             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 980             'duration': 102.226,
 981             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 982             'display_id': '1600649710662213632',
 983             'like_count': int,
 984             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 985             'upload_date': '20221208',
 986             'age_limit': 0,
 987             '_old_archive_ids': ['twitter 1600649710662213632'],
 988         },
 989         'params': {'noplaylist': True},
 990     }, {
 991         # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
 992         # note the id different between extraction and url
 993         'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
 994         'info_dict': {
 995             'id': '1621117577354424321',
 996             'display_id': '1621117700482416640',
 997             'ext': 'mp4',
 998             'title': '뽀 - 아 최우제 이동속도 봐',
 999             'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
1000             'duration': 24.598,
1001             'channel_id': '1281839411068432384',
1002             'uploader': '뽀',
1003             'uploader_id': 's2FAKER',
1004             'uploader_url': 'https://twitter.com/s2FAKER',
1005             'upload_date': '20230202',
1006             'timestamp': 1675339553.0,
1007             'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1008             'age_limit': 18,
1009             'tags': [],
1010             'like_count': int,
1011             'repost_count': int,
1012             'comment_count': int,
1013             '_old_archive_ids': ['twitter 1621117700482416640'],
1014         },
1015         'skip': 'Requires authentication',
1016     }, {
1017         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1018         'info_dict': {
1019             'id': '1599108643743473680',
1020             'display_id': '1599108751385972737',
1021             'ext': 'mp4',
1022             'title': '\u06ea - \U0001F48B',
1023             'channel_id': '1347791436809441283',
1024             'uploader_url': 'https://twitter.com/hlo_again',
1025             'like_count': int,
1026             'uploader_id': 'hlo_again',
1027             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1028             'repost_count': int,
1029             'duration': 9.531,
1030             'comment_count': int,
1031             'upload_date': '20221203',
1032             'age_limit': 0,
1033             'timestamp': 1670092210.0,
1034             'tags': [],
1035             'uploader': '\u06ea',
1036             'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1037             '_old_archive_ids': ['twitter 1599108751385972737'],
1038         },
1039         'params': {'noplaylist': True},
1040     }, {
1041         'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1042         'info_dict': {
1043             'id': '1600009362759733248',
1044             'display_id': '1600009574919962625',
1045             'ext': 'mp4',
1046             'channel_id': '211814412',
1047             'uploader_url': 'https://twitter.com/MunTheShinobi',
1048             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1049             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1050             'age_limit': 0,
1051             'uploader': 'Mün',
1052             'repost_count': int,
1053             'upload_date': '20221206',
1054             'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1055             'comment_count': int,
1056             'like_count': int,
1057             'tags': [],
1058             'uploader_id': 'MunTheShinobi',
1059             'duration': 139.987,
1060             'timestamp': 1670306984.0,
1061             '_old_archive_ids': ['twitter 1600009574919962625'],
1062         },
1063     }, {
1064         # retweeted_status (private)
1065         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1066         'info_dict': {
1067             'id': '1623274794488659969',
1068             'display_id': '1623739803874349067',
1069             'ext': 'mp4',
1070             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
1071             'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1072             'uploader': 'Johnny Bullets',
1073             'uploader_id': 'Johnnybull3ts',
1074             'uploader_url': 'https://twitter.com/Johnnybull3ts',
1075             'age_limit': 0,
1076             'tags': [],
1077             'duration': 8.033,
1078             'timestamp': 1675853859.0,
1079             'upload_date': '20230208',
1080             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1081             'like_count': int,
1082             'repost_count': int,
1083         },
1084         'skip': 'Protected tweet',
1085     }, {
1086         # retweeted_status
1087         'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1088         'info_dict': {
1089             'id': '1694928337846538240',
1090             'ext': 'mp4',
1091             'display_id': '1695424220702888009',
1092             'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1093             'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1094             'channel_id': '15212187',
1095             'uploader': 'Benny Johnson',
1096             'uploader_id': 'bennyjohnson',
1097             'uploader_url': 'https://twitter.com/bennyjohnson',
1098             'age_limit': 0,
1099             'tags': [],
1100             'duration': 45.001,
1101             'timestamp': 1692962814.0,
1102             'upload_date': '20230825',
1103             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1104             'like_count': int,
1105             'repost_count': int,
1106             'comment_count': int,
1107             '_old_archive_ids': ['twitter 1695424220702888009'],
1108         },
1109     }, {
1110         # retweeted_status w/ legacy API
1111         'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1112         'info_dict': {
1113             'id': '1694928337846538240',
1114             'ext': 'mp4',
1115             'display_id': '1695424220702888009',
1116             'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1117             'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1118             'channel_id': '15212187',
1119             'uploader': 'Benny Johnson',
1120             'uploader_id': 'bennyjohnson',
1121             'uploader_url': 'https://twitter.com/bennyjohnson',
1122             'age_limit': 0,
1123             'tags': [],
1124             'duration': 45.001,
1125             'timestamp': 1692962814.0,
1126             'upload_date': '20230825',
1127             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1128             'like_count': int,
1129             'repost_count': int,
1130             '_old_archive_ids': ['twitter 1695424220702888009'],
1131         },
1132         'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1133     }, {
1134         # Broadcast embedded in tweet
1135         'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1136         'info_dict': {
1137             'id': '1rmxPMjLzAXKN',
1138             'ext': 'mp4',
1139             'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1140             'uploader': 'Jessica Dobson',
1141             'uploader_id': 'JessicaDobsonWX',
1142             'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1143             'timestamp': 1701566398,
1144             'upload_date': '20231203',
1145             'live_status': 'was_live',
1146             'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1147             'concurrent_view_count': int,
1148             'view_count': int,
1149         },
1150         'add_ie': ['TwitterBroadcast'],
1151     }, {
1152         # Animated gif and quote tweet video
1153         'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1154         'playlist_mincount': 2,
1155         'info_dict': {
1156             'id': '1696256659889565950',
1157             'title': 'BAKOON - https://t.co/zom968d0a0',
1158             'description': 'https://t.co/zom968d0a0',
1159             'tags': [],
1160             'channel_id': '1263540390',
1161             'uploader': 'BAKOON',
1162             'uploader_id': 'BAKKOOONN',
1163             'uploader_url': 'https://twitter.com/BAKKOOONN',
1164             'age_limit': 18,
1165             'timestamp': 1693254077.0,
1166             'upload_date': '20230828',
1167             'like_count': int,
1168             'comment_count': int,
1169             'repost_count': int,
1170         },
1171         'skip': 'Requires authentication',
1172     }, {
1173         # "stale tweet" with typename "TweetWithVisibilityResults"
1174         'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1175         'md5': '511377ff8dfa7545307084dca4dce319',
1176         'info_dict': {
1177             'id': '1724883339285544960',
1178             'ext': 'mp4',
1179             'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1180             'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1181             'display_id': '1724884212803834154',
1182             'channel_id': '337808606',
1183             'uploader': 'Robert F. Kennedy Jr',
1184             'uploader_id': 'RobertKennedyJr',
1185             'uploader_url': 'https://twitter.com/RobertKennedyJr',
1186             'upload_date': '20231115',
1187             'timestamp': 1700079417.0,
1188             'duration': 341.048,
1189             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1190             'tags': ['Kennedy24'],
1191             'repost_count': int,
1192             'like_count': int,
1193             'comment_count': int,
1194             'age_limit': 0,
1195             '_old_archive_ids': ['twitter 1724884212803834154'],
1196         },
1197     }, {
1198         # x.com
1199         'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1200         'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1201         'info_dict': {
1202             'id': '1790637589910654976',
1203             'ext': 'mp4',
1204             'title': 'Historic Vids - One of the most intense moments in history',
1205             'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1206             'display_id': '1790637656616943991',
1207             'uploader': 'Historic Vids',
1208             'uploader_id': 'historyinmemes',
1209             'uploader_url': 'https://twitter.com/historyinmemes',
1210             'channel_id': '855481986290524160',
1211             'upload_date': '20240515',
1212             'timestamp': 1715756260.0,
1213             'duration': 15.488,
1214             'tags': [],
1215             'comment_count': int,
1216             'repost_count': int,
1217             'like_count': int,
1218             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1219             'age_limit': 0,
1220             '_old_archive_ids': ['twitter 1790637656616943991'],
1221         },
1222     }, {
1223         # onion route
1224         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1225         'only_matching': True,
1226     }, {
1227         # Twitch Clip Embed
1228         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1229         'only_matching': True,
1230     }, {
1231         # promo_video_website card
1232         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1233         'only_matching': True,
1234     }, {
1235         # promo_video_convo card
1236         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1237         'only_matching': True,
1238     }, {
1239         # appplayer card
1240         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1241         'only_matching': True,
1242     }, {
1243         # video_direct_message card
1244         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1245         'only_matching': True,
1246     }, {
1247         # poll2choice_video card
1248         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1249         'only_matching': True,
1250     }, {
1251         # poll3choice_video card
1252         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1253         'only_matching': True,
1254     }, {
1255         # poll4choice_video card
1256         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1257         'only_matching': True,
1258     }]
1259
1260     _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1261
1262     @property
1263     def _GRAPHQL_ENDPOINT(self):
1264         if self.is_logged_in:
1265             return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1266         return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1267
1268     def _graphql_to_legacy(self, data, twid):
1269         result = traverse_obj(data, (
1270             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1271             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1272             'tweet_results', 'result', ('tweet', None), {dict},
1273         ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1274             data, ('tweetResult', 'result', {dict}), default={})
1275
1276         typename = result.get('__typename')
1277         if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1278             self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1279
1280         if 'tombstone' in result:
1281             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1282             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1283         elif typename == 'TweetUnavailable':
1284             reason = result.get('reason')
1285             if reason == 'NsfwLoggedOut':
1286                 self.raise_login_required('NSFW tweet requires authentication')
1287             elif reason == 'Protected':
1288                 self.raise_login_required('You are not authorized to view this protected tweet')
1289             raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1290         # Result for "stale tweet" needs additional transformation
1291         elif typename == 'TweetWithVisibilityResults':
1292             result = traverse_obj(result, ('tweet', {dict})) or {}
1293
1294         status = result.get('legacy', {})
1295         status.update(traverse_obj(result, {
1296             'user': ('core', 'user_results', 'result', 'legacy'),
1297             'card': ('card', 'legacy'),
1298             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1299             'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1300         }, expected_type=dict, default={}))
1301
1302         # extra transformations needed since result does not match legacy format
1303         if status.get('retweeted_status'):
1304             status['retweeted_status']['user'] = traverse_obj(status, (
1305                 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1306
1307         binding_values = {
1308             binding_value.get('key'): binding_value.get('value')
1309             for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1310         }
1311         if binding_values:
1312             status['card']['binding_values'] = binding_values
1313
1314         return status
1315
1316     def _build_graphql_query(self, media_id):
1317         return {
1318             'variables': {
1319                 'focalTweetId': media_id,
1320                 'includePromotedContent': True,
1321                 'with_rux_injections': False,
1322                 'withBirdwatchNotes': True,
1323                 'withCommunity': True,
1324                 'withDownvotePerspective': False,
1325                 'withQuickPromoteEligibilityTweetFields': True,
1326                 'withReactionsMetadata': False,
1327                 'withReactionsPerspective': False,
1328                 'withSuperFollowsTweetFields': True,
1329                 'withSuperFollowsUserFields': True,
1330                 'withV2Timeline': True,
1331                 'withVoice': True,
1332             },
1333             'features': {
1334                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1335                 'interactive_text_enabled': True,
1336                 'responsive_web_edit_tweet_api_enabled': True,
1337                 'responsive_web_enhance_cards_enabled': True,
1338                 'responsive_web_graphql_timeline_navigation_enabled': False,
1339                 'responsive_web_text_conversations_enabled': False,
1340                 'responsive_web_uc_gql_enabled': True,
1341                 'standardized_nudges_misinfo': True,
1342                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1343                 'tweetypie_unmention_optimization_enabled': True,
1344                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1345                 'verified_phone_label_enabled': False,
1346                 'vibe_api_enabled': True,
1347             },
1348         } if self.is_logged_in else {
1349             'variables': {
1350                 'tweetId': media_id,
1351                 'withCommunity': False,
1352                 'includePromotedContent': False,
1353                 'withVoice': False,
1354             },
1355             'features': {
1356                 'creator_subscriptions_tweet_preview_api_enabled': True,
1357                 'tweetypie_unmention_optimization_enabled': True,
1358                 'responsive_web_edit_tweet_api_enabled': True,
1359                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1360                 'view_counts_everywhere_api_enabled': True,
1361                 'longform_notetweets_consumption_enabled': True,
1362                 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1363                 'tweet_awards_web_tipping_enabled': False,
1364                 'freedom_of_speech_not_reach_fetch_enabled': True,
1365                 'standardized_nudges_misinfo': True,
1366                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1367                 'longform_notetweets_rich_text_read_enabled': True,
1368                 'longform_notetweets_inline_media_enabled': True,
1369                 'responsive_web_graphql_exclude_directive_enabled': True,
1370                 'verified_phone_label_enabled': False,
1371                 'responsive_web_media_download_video_enabled': False,
1372                 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1373                 'responsive_web_graphql_timeline_navigation_enabled': True,
1374                 'responsive_web_enhance_cards_enabled': False,
1375             },
1376             'fieldToggles': {
1377                 'withArticleRichContentState': False,
1378             },
1379         }
1380
1381     def _call_syndication_api(self, twid):
1382         self.report_warning(
1383             'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1384         status = self._download_json(
1385             'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1386             headers={'User-Agent': 'Googlebot'}, query={
1387                 'id': twid,
1388                 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1389                 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1390             })
1391         if not status:
1392             raise ExtractorError('Syndication endpoint returned empty JSON response')
1393         # Transform the result so its structure matches that of legacy/graphql
1394         media = []
1395         for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1396             detail['id_str'] = traverse_obj(detail, (
1397                 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1398             media.append(detail)
1399         status['extended_entities'] = {'media': media}
1400
1401         return status
1402
1403     def _extract_status(self, twid):
1404         if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1405             raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1406
1407         try:
1408             if self.is_logged_in or self._selected_api == 'graphql':
1409                 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1410             elif self._selected_api == 'legacy':
1411                 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1412                     'cards_platform': 'Web-12',
1413                     'include_cards': 1,
1414                     'include_reply_count': 1,
1415                     'include_user_entities': 0,
1416                     'tweet_mode': 'extended',
1417                 })
1418         except ExtractorError as e:
1419             if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1420                 raise
1421             self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1422             status = self._call_syndication_api(twid)
1423
1424         if self._selected_api == 'syndication':
1425             status = self._call_syndication_api(twid)
1426
1427         return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1428
1429     def _real_extract(self, url):
1430         twid, selected_index = self._match_valid_url(url).group('id', 'index')
1431         status = self._extract_status(twid)
1432
1433         title = description = traverse_obj(
1434             status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1435         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
1436         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1437         user = status.get('user') or {}
1438         uploader = user.get('name')
1439         if uploader:
1440             title = f'{uploader} - {title}'
1441         uploader_id = user.get('screen_name')
1442
1443         info = {
1444             'id': twid,
1445             'title': title,
1446             'description': description,
1447             'uploader': uploader,
1448             'timestamp': unified_timestamp(status.get('created_at')),
1449             'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
1450             'uploader_id': uploader_id,
1451             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1452             'like_count': int_or_none(status.get('favorite_count')),
1453             'repost_count': int_or_none(status.get('retweet_count')),
1454             'comment_count': int_or_none(status.get('reply_count')),
1455             'age_limit': 18 if status.get('possibly_sensitive') else 0,
1456             'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1457         }
1458
1459         def extract_from_video_info(media):
1460             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1461             self.write_debug(f'Extracting from video info: {media_id}')
1462
1463             formats = []
1464             subtitles = {}
1465             for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1466                 fmts, subs = self._extract_variant_formats(variant, twid)
1467                 subtitles = self._merge_subtitles(subtitles, subs)
1468                 formats.extend(fmts)
1469
1470             thumbnails = []
1471             media_url = media.get('media_url_https') or media.get('media_url')
1472             if media_url:
1473                 def add_thumbnail(name, size):
1474                     thumbnails.append({
1475                         'id': name,
1476                         'url': update_url_query(media_url, {'name': name}),
1477                         'width': int_or_none(size.get('w') or size.get('width')),
1478                         'height': int_or_none(size.get('h') or size.get('height')),
1479                     })
1480                 for name, size in media.get('sizes', {}).items():
1481                     add_thumbnail(name, size)
1482                 add_thumbnail('orig', media.get('original_info') or {})
1483
1484             return {
1485                 'id': media_id,
1486                 'formats': formats,
1487                 'subtitles': subtitles,
1488                 'thumbnails': thumbnails,
1489                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),  # No longer available
1490                 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1491                 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1492                 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'),  # http format codec is unknown
1493             }
1494
1495         def extract_from_card_info(card):
1496             if not card:
1497                 return
1498
1499             self.write_debug(f'Extracting from card info: {card.get("url")}')
1500             binding_values = card['binding_values']
1501
1502             def get_binding_value(k):
1503                 o = binding_values.get(k) or {}
1504                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1505
1506             card_name = card['name'].split(':')[-1]
1507             if card_name == 'player':
1508                 yield {
1509                     '_type': 'url',
1510                     'url': get_binding_value('player_url'),
1511                 }
1512             elif card_name == 'periscope_broadcast':
1513                 yield {
1514                     '_type': 'url',
1515                     'url': get_binding_value('url') or get_binding_value('player_url'),
1516                     'ie_key': PeriscopeIE.ie_key(),
1517                 }
1518             elif card_name == 'broadcast':
1519                 yield {
1520                     '_type': 'url',
1521                     'url': get_binding_value('broadcast_url'),
1522                     'ie_key': TwitterBroadcastIE.ie_key(),
1523                 }
1524             elif card_name == 'audiospace':
1525                 yield {
1526                     '_type': 'url',
1527                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1528                     'ie_key': TwitterSpacesIE.ie_key(),
1529                 }
1530             elif card_name == 'summary':
1531                 yield {
1532                     '_type': 'url',
1533                     'url': get_binding_value('card_url'),
1534                 }
1535             elif card_name == 'unified_card':
1536                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1537                 yield from map(extract_from_video_info, traverse_obj(
1538                     unified_card, ('media_entities', ...), expected_type=dict))
1539             # amplify, promo_video_website, promo_video_convo, appplayer,
1540             # video_direct_message, poll2choice_video, poll3choice_video,
1541             # poll4choice_video, ...
1542             else:
1543                 is_amplify = card_name == 'amplify'
1544                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1545                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1546                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1547
1548                 thumbnails = []
1549                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1550                     image = get_binding_value('player_image' + suffix) or {}
1551                     image_url = image.get('url')
1552                     if not image_url or '/player-placeholder' in image_url:
1553                         continue
1554                     thumbnails.append({
1555                         'id': suffix[1:] if suffix else 'medium',
1556                         'url': image_url,
1557                         'width': int_or_none(image.get('width')),
1558                         'height': int_or_none(image.get('height')),
1559                     })
1560
1561                 yield {
1562                     'formats': formats,
1563                     'subtitles': subtitles,
1564                     'thumbnails': thumbnails,
1565                     'duration': int_or_none(get_binding_value(
1566                         'content_duration_seconds')),
1567                 }
1568
1569         videos = traverse_obj(status, (
1570             (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1571
1572         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1573             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1574         else:
1575             desired_obj = traverse_obj(status, (
1576                 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1577             if not desired_obj:
1578                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1579             elif desired_obj.get('type') != 'video':
1580                 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1581
1582             # Restore original archive id and video index in title
1583             for index, entry in enumerate(videos, 1):
1584                 if entry.get('id') != desired_obj.get('id'):
1585                     continue
1586                 if index == 1:
1587                     info['_old_archive_ids'] = [make_archive_id(self, twid)]
1588                 if len(videos) != 1:
1589                     info['title'] += f' #{index}'
1590                 break
1591
1592             return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1593
1594         entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1595         if not entries:
1596             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1597             if not expanded_url or expanded_url == url:
1598                 self.raise_no_formats('No video could be found in this tweet', expected=True)
1599                 return info
1600
1601             return self.url_result(expanded_url, display_id=twid, **info)
1602
1603         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1604
1605         if len(entries) == 1:
1606             return entries[0]
1607
1608         for index, entry in enumerate(entries, 1):
1609             entry['title'] += f' #{index}'
1610
1611         return self.playlist_result(entries, **info)
1612
1613
1614 class TwitterAmplifyIE(TwitterBaseIE):
1615     IE_NAME = 'twitter:amplify'
1616     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1617
1618     _TEST = {
1619         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1620         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1621         'info_dict': {
1622             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1623             'ext': 'mp4',
1624             'title': 'Twitter Video',
1625             'thumbnail': 're:^https?://.*',
1626         },
1627         'params': {'format': '[protocol=https]'},
1628     }
1629
1630     def _real_extract(self, url):
1631         video_id = self._match_id(url)
1632         webpage = self._download_webpage(url, video_id)
1633
1634         vmap_url = self._html_search_meta(
1635             'twitter:amplify:vmap', webpage, 'vmap url')
1636         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1637
1638         thumbnails = []
1639         thumbnail = self._html_search_meta(
1640             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1641
1642         def _find_dimension(target):
1643             w = int_or_none(self._html_search_meta(
1644                 f'twitter:{target}:width', webpage, fatal=False))
1645             h = int_or_none(self._html_search_meta(
1646                 f'twitter:{target}:height', webpage, fatal=False))
1647             return w, h
1648
1649         if thumbnail:
1650             thumbnail_w, thumbnail_h = _find_dimension('image')
1651             thumbnails.append({
1652                 'url': thumbnail,
1653                 'width': thumbnail_w,
1654                 'height': thumbnail_h,
1655             })
1656
1657         video_w, video_h = _find_dimension('player')
1658         formats[0].update({
1659             'width': video_w,
1660             'height': video_h,
1661         })
1662
1663         return {
1664             'id': video_id,
1665             'title': 'Twitter Video',
1666             'formats': formats,
1667             'thumbnails': thumbnails,
1668         }
1669
1670
1671 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1672     IE_NAME = 'twitter:broadcast'
1673     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1674
1675     _TESTS = [{
1676         # untitled Periscope video
1677         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1678         'info_dict': {
1679             'id': '1yNGaQLWpejGj',
1680             'ext': 'mp4',
1681             'title': 'Andrea May Sahouri - Periscope Broadcast',
1682             'uploader': 'Andrea May Sahouri',
1683             'uploader_id': 'andreamsahouri',
1684             'uploader_url': 'https://twitter.com/andreamsahouri',
1685             'timestamp': 1590973638,
1686             'upload_date': '20200601',
1687             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1688             'view_count': int,
1689         },
1690     }, {
1691         'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1692         'info_dict': {
1693             'id': '1ZkKzeyrPbaxv',
1694             'ext': 'mp4',
1695             'title': 'Starship | SN10 | High-Altitude Flight Test',
1696             'uploader': 'SpaceX',
1697             'uploader_id': 'SpaceX',
1698             'uploader_url': 'https://twitter.com/SpaceX',
1699             'timestamp': 1614812942,
1700             'upload_date': '20210303',
1701             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1702             'view_count': int,
1703         },
1704     }, {
1705         'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1706         'info_dict': {
1707             'id': '1OyKAVQrgzwGb',
1708             'ext': 'mp4',
1709             'title': 'Starship Flight Test',
1710             'uploader': 'SpaceX',
1711             'uploader_id': 'SpaceX',
1712             'uploader_url': 'https://twitter.com/SpaceX',
1713             'timestamp': 1681993964,
1714             'upload_date': '20230420',
1715             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1716             'view_count': int,
1717         },
1718     }]
1719
1720     def _real_extract(self, url):
1721         broadcast_id = self._match_id(url)
1722         broadcast = self._call_api(
1723             'broadcasts/show.json', broadcast_id,
1724             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1725         if not broadcast:
1726             raise ExtractorError('Broadcast no longer exists', expected=True)
1727         info = self._parse_broadcast_data(broadcast, broadcast_id)
1728         info['title'] = broadcast.get('status') or info.get('title')
1729         info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1730         info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1731         if info['live_status'] == 'is_upcoming':
1732             return info
1733
1734         media_key = broadcast['media_key']
1735         source = self._call_api(
1736             f'live_video_stream/status/{media_key}', media_key)['source']
1737         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1738         if '/live_video_stream/geoblocked/' in m3u8_url:
1739             self.raise_geo_restricted()
1740         m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse(
1741             m3u8_url).query).get('type', [None])[0]
1742         state, width, height = self._extract_common_format_info(broadcast)
1743         info['formats'] = self._extract_pscp_m3u8_formats(
1744             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1745         return info
1746
1747
1748 class TwitterSpacesIE(TwitterBaseIE):
1749     IE_NAME = 'twitter:spaces'
1750     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1751
1752     _TESTS = [{
1753         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1754         'info_dict': {
1755             'id': '1RDxlgyvNXzJL',
1756             'ext': 'm4a',
1757             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1758             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1759             'uploader': r're:Lucio Di Gaetano.*?',
1760             'uploader_id': 'luciodigaetano',
1761             'live_status': 'was_live',
1762             'timestamp': 1659877956,
1763             'upload_date': '20220807',
1764             'release_timestamp': 1659904215,
1765             'release_date': '20220807',
1766         },
1767         'skip': 'No longer available',
1768     }, {
1769         # post_live/TimedOut but downloadable
1770         'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1771         'info_dict': {
1772             'id': '1vAxRAVQWONJl',
1773             'ext': 'm4a',
1774             'title': 'Framing Up FinOps: Billing Tools',
1775             'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1776             'uploader': 'Google Cloud',
1777             'uploader_id': 'googlecloud',
1778             'live_status': 'post_live',
1779             'timestamp': 1681409554,
1780             'upload_date': '20230413',
1781             'release_timestamp': 1681839000,
1782             'release_date': '20230418',
1783             'protocol': 'm3u8',  # ffmpeg is forced
1784             'container': 'm4a_dash',  # audio-only format fixup is applied
1785         },
1786         'params': {'skip_download': 'm3u8'},
1787     }, {
1788         # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1789         'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1790         'info_dict': {
1791             'id': '1eaKbrQbjoRKX',
1792             'ext': 'm4a',
1793             'title': 'あ',
1794             'description': 'Twitter Space participated by nobody yet',
1795             'uploader': '息根とめる',
1796             'uploader_id': 'tomeru_ikinone',
1797             'live_status': 'was_live',
1798             'timestamp': 1685617198,
1799             'upload_date': '20230601',
1800             'protocol': 'm3u8',  # ffmpeg is forced
1801             'container': 'm4a_dash',  # audio-only format fixup is applied
1802         },
1803         'params': {'skip_download': 'm3u8'},
1804     }, {
1805         # Video Space
1806         'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
1807         'info_dict': {
1808             'id': '1DXGydznBYWKM',
1809             'ext': 'mp4',
1810             'title': 'America and Israel’s “special relationship”',
1811             'description': 'Twitter Space participated by nobody yet',
1812             'uploader': 'Candace Owens',
1813             'uploader_id': 'RealCandaceO',
1814             'live_status': 'was_live',
1815             'timestamp': 1723931351,
1816             'upload_date': '20240817',
1817             'release_timestamp': 1723932000,
1818             'release_date': '20240817',
1819             'protocol': 'm3u8_native',  # not ffmpeg, detected as video space
1820         },
1821         'params': {'skip_download': 'm3u8'},
1822     }]
1823
1824     SPACE_STATUS = {
1825         'notstarted': 'is_upcoming',
1826         'ended': 'was_live',
1827         'running': 'is_live',
1828         'timedout': 'post_live',
1829     }
1830
1831     def _build_graphql_query(self, space_id):
1832         return {
1833             'variables': {
1834                 'id': space_id,
1835                 'isMetatagsQuery': True,
1836                 'withDownvotePerspective': False,
1837                 'withReactionsMetadata': False,
1838                 'withReactionsPerspective': False,
1839                 'withReplays': True,
1840                 'withSuperFollowsUserFields': True,
1841                 'withSuperFollowsTweetFields': True,
1842             },
1843             'features': {
1844                 'dont_mention_me_view_api_enabled': True,
1845                 'interactive_text_enabled': True,
1846                 'responsive_web_edit_tweet_api_enabled': True,
1847                 'responsive_web_enhance_cards_enabled': True,
1848                 'responsive_web_uc_gql_enabled': True,
1849                 'spaces_2022_h2_clipping': True,
1850                 'spaces_2022_h2_spaces_communities': False,
1851                 'standardized_nudges_misinfo': True,
1852                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1853                 'vibe_api_enabled': True,
1854             },
1855         }
1856
1857     def _real_extract(self, url):
1858         space_id = self._match_id(url)
1859         if not self.is_logged_in:
1860             self.raise_login_required('Twitter Spaces require authentication')
1861         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1862         if not space_data:
1863             raise ExtractorError('Twitter Space not found', expected=True)
1864
1865         metadata = space_data['metadata']
1866         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1867         is_live = live_status == 'is_live'
1868
1869         formats = []
1870         headers = {'Referer': 'https://twitter.com/'}
1871         if live_status == 'is_upcoming':
1872             self.raise_no_formats('Twitter Space not started yet', expected=True)
1873         elif not is_live and not metadata.get('is_space_available_for_replay'):
1874             self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1875         elif metadata.get('media_key'):
1876             source = traverse_obj(
1877                 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1878                 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1879             is_audio_space = source and 'audio-space' in source
1880             formats = self._extract_m3u8_formats(
1881                 source, metadata['media_key'], 'm4a' if is_audio_space else 'mp4',
1882                 # XXX: Some audio-only Spaces need ffmpeg as downloader
1883                 entry_protocol='m3u8' if is_audio_space else 'm3u8_native',
1884                 live=is_live, headers=headers, fatal=False) if source else []
1885             if is_audio_space:
1886                 for fmt in formats:
1887                     fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1888                     if not is_live:
1889                         fmt['container'] = 'm4a_dash'
1890
1891         participants = ', '.join(traverse_obj(
1892             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1893
1894         if not formats and live_status == 'post_live':
1895             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1896
1897         return {
1898             'id': space_id,
1899             'title': metadata.get('title'),
1900             'description': f'Twitter Space participated by {participants}',
1901             'uploader': traverse_obj(
1902                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1903             'uploader_id': traverse_obj(
1904                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1905             'live_status': live_status,
1906             'release_timestamp': try_call(
1907                 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1908             'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1909             'formats': formats,
1910             'http_headers': headers,
1911         }
1912
1913
1914 class TwitterShortenerIE(TwitterBaseIE):
1915     IE_NAME = 'twitter:shortener'
1916     _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1917     _BASE_URL = 'https://t.co/'
1918
1919     def _real_extract(self, url):
1920         mobj = self._match_valid_url(url)
1921         eid, shortcode = mobj.group('eid', 'id')
1922         if eid:
1923             shortcode = eid
1924             url = self._BASE_URL + shortcode
1925         new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url
1926         __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
1927         if new_url.startswith(__UNSAFE_LINK):
1928             new_url = new_url.replace(__UNSAFE_LINK, '')
1929         return self.url_result(new_url)