7 from .common
import InfoExtractor
8 from .periscope
import PeriscopeBaseIE
, PeriscopeIE
9 from ..networking
.exceptions
import HTTPError
32 class TwitterBaseIE(InfoExtractor
):
33 _NETRC_MACHINE
= 'twitter'
34 _API_BASE
= 'https://api.x.com/1.1/'
35 _GRAPHQL_API_BASE
= 'https://x.com/i/api/graphql/'
36 _BASE_REGEX
= r
'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
37 _AUTH
= 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
38 _LEGACY_AUTH
= 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
41 _LOGIN_INIT_DATA
= json
.dumps({
44 'debug_overrides': {},
46 'location': 'unknown',
53 'app_download_cta': 1,
54 'check_logged_in_account': 1,
55 'choice_selection': 3,
56 'contacts_live_sync_permission_prompt': 0,
58 'email_verification': 2,
68 'in_app_notification': 1,
70 'js_instrumentation': 1,
72 'notifications_permission_prompt': 2,
74 'open_home_timeline': 1,
76 'phone_verification': 4,
85 'tweet_selection_urt': 1,
88 'user_recommendations_list': 4,
89 'user_recommendations_urt': 1,
93 }, separators
=(',', ':')).encode()
95 def _extract_variant_formats(self
, variant
, video_id
):
96 variant_url
= variant
.get('url')
99 elif '.m3u8' in variant_url
:
100 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
101 variant_url
, video_id
, 'mp4', 'm3u8_native',
102 m3u8_id
='hls', fatal
=False)
103 for f
in traverse_obj(fmts
, lambda _
, v
: v
['vcodec'] == 'none' and v
.get('tbr') is None):
104 if mobj
:= re
.match(r
'hls-[Aa]udio-(?P<bitrate>\d{4,})', f
['format_id']):
105 f
['tbr'] = int_or_none(mobj
.group('bitrate'), 1000)
108 tbr
= int_or_none(dict_get(variant
, ('bitrate', 'bit_rate')), 1000) or None
111 'format_id': join_nonempty('http', tbr
),
114 self
._search
_dimensions
_in
_video
_url
(f
, variant_url
)
117 def _extract_formats_from_vmap_url(self
, vmap_url
, video_id
):
118 vmap_url
= url_or_none(vmap_url
)
121 vmap_data
= self
._download
_xml
(vmap_url
, video_id
)
125 for video_variant
in vmap_data
.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
126 video_variant
.attrib
['url'] = urllib
.parse
.unquote(
127 video_variant
.attrib
['url'])
128 urls
.append(video_variant
.attrib
['url'])
129 fmts
, subs
= self
._extract
_variant
_formats
(
130 video_variant
.attrib
, video_id
)
132 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
133 video_url
= strip_or_none(xpath_text(vmap_data
, './/MediaFile'))
134 if video_url
not in urls
:
135 fmts
, subs
= self
._extract
_variant
_formats
({'url': video_url
}, video_id
)
137 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
138 return formats
, subtitles
141 def _search_dimensions_in_video_url(a_format
, video_url
):
142 m
= re
.search(r
'/(?P<width>\d+)x(?P<height>\d+)/', video_url
)
145 'width': int(m
.group('width')),
146 'height': int(m
.group('height')),
150 def is_logged_in(self
):
151 return bool(self
._get
_cookies
(self
._API
_BASE
).get('auth_token'))
153 @functools.cached_property
154 def _selected_api(self
):
155 return self
._configuration
_arg
('api', ['graphql'], ie_key
='Twitter')[0]
157 def _fetch_guest_token(self
, display_id
):
158 guest_token
= traverse_obj(self
._download
_json
(
159 f
'{self._API_BASE}guest/activate.json', display_id
, 'Downloading guest token', data
=b
'',
160 headers
=self
._set
_base
_headers
(legacy
=display_id
and self
._selected
_api
== 'legacy')),
161 ('guest_token', {str}
))
163 raise ExtractorError('Could not retrieve guest token')
166 def _set_base_headers(self
, legacy
=False):
167 bearer_token
= self
._LEGACY
_AUTH
if legacy
and not self
.is_logged_in
else self
._AUTH
169 'Authorization': f
'Bearer {bearer_token}',
170 'x-csrf-token': try_call(lambda: self
._get
_cookies
(self
._API
_BASE
)['ct0'].value
),
173 def _call_login_api(self
, note
, headers
, query
={}, data
=None):
174 response
= self
._download
_json
(
175 f
'{self._API_BASE}onboarding/task.json', None, note
,
176 headers
=headers
, query
=query
, data
=data
, expected_status
=400)
177 error
= traverse_obj(response
, ('errors', 0, 'message', {str}
))
179 raise ExtractorError(f
'Login failed, Twitter API says: {error}', expected
=True)
180 elif traverse_obj(response
, 'status') != 'success':
181 raise ExtractorError('Login was unsuccessful')
183 subtask
= traverse_obj(
184 response
, ('subtasks', ..., 'subtask_id', {str}
), get_all
=False)
186 raise ExtractorError('Twitter API did not return next login subtask')
188 self
._flow
_token
= response
['flow_token']
192 def _perform_login(self
, username
, password
):
193 if self
.is_logged_in
:
196 guest_token
= self
._fetch
_guest
_token
(None)
198 **self
._set
_base
_headers
(),
199 'content-type': 'application/json',
200 'x-guest-token': guest_token
,
201 'x-twitter-client-language': 'en',
202 'x-twitter-active-user': 'yes',
203 'Referer': 'https://x.com/',
204 'Origin': 'https://x.com',
207 def build_login_json(*subtask_inputs
):
209 'flow_token': self
._flow
_token
,
210 'subtask_inputs': subtask_inputs
,
211 }, separators
=(',', ':')).encode()
213 def input_dict(subtask_id
, text
):
215 'subtask_id': subtask_id
,
222 next_subtask
= self
._call
_login
_api
(
223 'Downloading flow token', headers
, query
={'flow_name': 'login'}, data
=self
._LOGIN
_INIT
_DATA
)
225 while not self
.is_logged_in
:
226 if next_subtask
== 'LoginJsInstrumentationSubtask':
227 next_subtask
= self
._call
_login
_api
(
228 'Submitting JS instrumentation response', headers
, data
=build_login_json({
229 'subtask_id': next_subtask
,
230 'js_instrumentation': {
236 elif next_subtask
== 'LoginEnterUserIdentifierSSO':
237 next_subtask
= self
._call
_login
_api
(
238 'Submitting username', headers
, data
=build_login_json({
239 'subtask_id': next_subtask
,
241 'setting_responses': [{
242 'key': 'user_identifier',
253 elif next_subtask
== 'LoginEnterAlternateIdentifierSubtask':
254 next_subtask
= self
._call
_login
_api
(
255 'Submitting alternate identifier', headers
,
256 data
=build_login_json(input_dict(next_subtask
, self
._get
_tfa
_info
(
257 'one of username, phone number or email that was not used as --username'))))
259 elif next_subtask
== 'LoginEnterPassword':
260 next_subtask
= self
._call
_login
_api
(
261 'Submitting password', headers
, data
=build_login_json({
262 'subtask_id': next_subtask
,
264 'password': password
,
269 elif next_subtask
== 'AccountDuplicationCheck':
270 next_subtask
= self
._call
_login
_api
(
271 'Submitting account duplication check', headers
, data
=build_login_json({
272 'subtask_id': next_subtask
,
273 'check_logged_in_account': {
274 'link': 'AccountDuplicationCheck_false',
278 elif next_subtask
== 'LoginTwoFactorAuthChallenge':
279 next_subtask
= self
._call
_login
_api
(
280 'Submitting 2FA token', headers
, data
=build_login_json(input_dict(
281 next_subtask
, self
._get
_tfa
_info
('two-factor authentication token'))))
283 elif next_subtask
== 'LoginAcid':
284 next_subtask
= self
._call
_login
_api
(
285 'Submitting confirmation code', headers
, data
=build_login_json(input_dict(
286 next_subtask
, self
._get
_tfa
_info
('confirmation code sent to your email or phone'))))
288 elif next_subtask
== 'ArkoseLogin':
289 self
.raise_login_required('Twitter is requiring captcha for this login attempt', method
='cookies')
291 elif next_subtask
== 'DenyLoginSubtask':
292 self
.raise_login_required('Twitter rejected this login attempt as suspicious', method
='cookies')
294 elif next_subtask
== 'LoginSuccessSubtask':
295 raise ExtractorError('Twitter API did not grant auth token cookie')
298 raise ExtractorError(f
'Unrecognized subtask ID "{next_subtask}"')
302 def _call_api(self
, path
, video_id
, query
={}, graphql
=False):
303 headers
= self
._set
_base
_headers
(legacy
=not graphql
and self
._selected
_api
== 'legacy')
305 'x-twitter-auth-type': 'OAuth2Session',
306 'x-twitter-client-language': 'en',
307 'x-twitter-active-user': 'yes',
308 } if self
.is_logged_in
else {
309 'x-guest-token': self
._fetch
_guest
_token
(video_id
),
311 allowed_status
= {400, 401, 403, 404} if graphql
else {403}
312 result
= self
._download
_json
(
313 (self
._GRAPHQL
_API
_BASE
if graphql
else self
._API
_BASE
) + path
,
314 video_id
, headers
=headers
, query
=query
, expected_status
=allowed_status
,
315 note
=f
'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
317 if result
.get('errors'):
318 errors
= ', '.join(set(traverse_obj(result
, ('errors', ..., 'message', {str}
))))
319 if errors
and 'not authorized' in errors
:
320 self
.raise_login_required(remove_end(errors
, '.'))
321 raise ExtractorError(f
'Error(s) while querying API: {errors or "Unknown error"}')
325 def _build_graphql_query(self
, media_id
):
326 raise NotImplementedError('Method must be implemented to support GraphQL')
328 def _call_graphql_api(self
, endpoint
, media_id
):
329 data
= self
._build
_graphql
_query
(media_id
)
330 query
= {key
: json
.dumps(value
, separators
=(',', ':')) for key
, value
in data
.items()}
331 return traverse_obj(self
._call
_api
(endpoint
, media_id
, query
=query
, graphql
=True), 'data')
334 class TwitterCardIE(InfoExtractor
):
335 IE_NAME
= 'twitter:card'
336 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
339 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
340 # MD5 checksums are different in different places
342 'id': '560070131976392705',
344 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
345 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
346 'uploader': 'Twitter',
347 'uploader_id': 'Twitter',
348 'thumbnail': r
're:^https?://.*\.jpg',
350 'timestamp': 1422366112,
351 'upload_date': '20150127',
353 'comment_count': int,
357 'display_id': '560070183650213889',
358 'uploader_url': 'https://twitter.com/Twitter',
362 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
363 'md5': '7137eca597f72b9abbe61e5ae0161399',
365 'id': '623160978427936768',
367 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
368 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
370 'uploader_id': 'NASA',
371 'timestamp': 1437408129,
372 'upload_date': '20150720',
373 'uploader_url': 'https://twitter.com/NASA',
375 'comment_count': int,
378 'tags': ['PlutoFlyby'],
380 'params': {'format': '[protocol=https]'},
383 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
384 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
388 'title': 'Ubuntu 11.10 Overview',
389 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
390 'upload_date': '20111013',
391 'uploader': 'OMG! UBUNTU!',
392 'uploader_id': 'omgubuntu',
393 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
394 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
395 'channel_follower_count': int,
396 'chapters': 'count:8',
397 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
399 'categories': ['Film & Animation'],
401 'comment_count': int,
402 'availability': 'public',
404 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
407 'channel': 'OMG! UBUNTU!',
408 'playable_in_embed': True,
410 'add_ie': ['Youtube'],
413 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
417 'upload_date': '20151113',
418 'uploader_id': '1189339351084113920',
419 'uploader': 'ArsenalTerje',
420 'title': 'Vine by ArsenalTerje',
421 'timestamp': 1447451307,
422 'alt_title': 'Vine by ArsenalTerje',
423 'comment_count': int,
425 'thumbnail': r
're:^https?://[^?#]+\.jpg',
430 'params': {'skip_download': 'm3u8'},
433 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
434 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
436 'id': '705235433198714880',
438 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
439 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
440 'uploader': 'Brent Yarina',
441 'uploader_id': 'BTNBrentYarina',
442 'timestamp': 1456976204,
443 'upload_date': '20160303',
445 'skip': 'This content is no longer available.',
448 'url': 'https://twitter.com/i/videos/752274308186120192',
449 'only_matching': True,
453 def _real_extract(self
, url
):
454 status_id
= self
._match
_id
(url
)
455 return self
.url_result(
456 'https://twitter.com/statuses/' + status_id
,
457 TwitterIE
.ie_key(), status_id
)
460 class TwitterIE(TwitterBaseIE
):
462 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
465 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
467 'id': '643211870443208704',
468 'display_id': '643211948184596480',
470 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
471 'thumbnail': r
're:^https?://.*\.jpg',
472 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
473 'channel_id': '549749560',
474 'uploader': 'FREE THE NIPPLE',
475 'uploader_id': 'freethenipple',
477 'timestamp': 1442188653,
478 'upload_date': '20150913',
479 'uploader_url': 'https://twitter.com/freethenipple',
480 'comment_count': int,
485 '_old_archive_ids': ['twitter 643211948184596480'],
487 'skip': 'Requires authentication',
489 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
490 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
492 'id': '657991469417025536',
494 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
495 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
496 'thumbnail': r
're:^https?://.*\.png',
498 'uploader_id': 'giphz',
500 'expected_warnings': ['height', 'width'],
501 'skip': 'Account suspended',
503 'url': 'https://twitter.com/starwars/status/665052190608723968',
505 'id': '665052190608723968',
506 'display_id': '665052190608723968',
508 'title': r
're:Star Wars.*A new beginning is coming December 18.*',
509 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
510 'channel_id': '20106852',
511 'uploader_id': 'starwars',
512 'uploader': r
're:Star Wars.*',
513 'timestamp': 1447395772,
514 'upload_date': '20151113',
515 'uploader_url': 'https://twitter.com/starwars',
516 'comment_count': int,
519 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
521 '_old_archive_ids': ['twitter 665052190608723968'],
524 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
526 'id': '705235433198714880',
528 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
529 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
530 'uploader_id': 'BTNBrentYarina',
531 'uploader': 'Brent Yarina',
532 'timestamp': 1456976204,
533 'upload_date': '20160303',
534 'uploader_url': 'https://twitter.com/BTNBrentYarina',
535 'comment_count': int,
542 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
543 # Test case of TwitterCardIE
544 'skip_download': True,
546 'skip': 'Dead external link',
548 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
550 'id': '700207414000242688',
551 'display_id': '700207533655363584',
553 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
554 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
555 'thumbnail': r
're:^https?://.*\.jpg',
556 'channel_id': '1383165541',
557 'uploader': 'jaydin donte geer',
558 'uploader_id': 'jaydingeer',
560 'timestamp': 1455777459,
561 'upload_date': '20160218',
562 'uploader_url': 'https://twitter.com/jaydingeer',
563 'comment_count': int,
566 'tags': ['Damndaniel'],
568 '_old_archive_ids': ['twitter 700207533655363584'],
571 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
572 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
576 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
577 'uploader': 'TAKUMA',
578 'uploader_id': '1004126642786242560',
579 'timestamp': 1402826626,
580 'upload_date': '20140615',
581 'thumbnail': r
're:^https?://.*\.jpg',
582 'alt_title': 'Vine by TAKUMA',
583 'comment_count': int,
590 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
592 'id': '717462543795523584',
593 'display_id': '719944021058060289',
595 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
596 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
597 'channel_id': '701615052',
598 'uploader_id': 'CaptainAmerica',
599 'uploader': 'Captain America',
601 'timestamp': 1460483005,
602 'upload_date': '20160412',
603 'uploader_url': 'https://twitter.com/CaptainAmerica',
604 'thumbnail': r
're:^https?://.*\.jpg',
605 'comment_count': int,
610 '_old_archive_ids': ['twitter 719944021058060289'],
613 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
615 'id': '1zqKVVlkqLaKB',
617 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
618 'upload_date': '20160923',
619 'uploader_id': '1PmKqpJdOJQoY',
620 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
621 'timestamp': 1474613214,
622 'thumbnail': r
're:^https?://.*\.jpg',
624 'add_ie': ['Periscope'],
625 'skip': 'Broadcast not found',
627 # has mp4 formats via mobile API
628 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
630 'id': '852077943283097602',
632 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
633 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
634 'channel_id': '2526757026',
635 'uploader': 'عالم الأخبار',
636 'uploader_id': 'news_al3alm',
638 'timestamp': 1492000653,
639 'upload_date': '20170412',
640 'display_id': '852138619213144067',
642 'uploader_url': 'https://twitter.com/news_al3alm',
643 'thumbnail': r
're:^https?://.*\.jpg',
647 'comment_count': int,
648 '_old_archive_ids': ['twitter 852138619213144067'],
651 'url': 'https://twitter.com/i/web/status/910031516746514432',
653 'id': '910030238373089285',
654 'display_id': '910031516746514432',
656 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
657 'thumbnail': r
're:^https?://.*\.jpg',
658 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
659 'channel_id': '2319432498',
660 'uploader': 'Préfet de Guadeloupe',
661 'uploader_id': 'Prefet971',
663 'timestamp': 1505803395,
664 'upload_date': '20170919',
665 'uploader_url': 'https://twitter.com/Prefet971',
666 'comment_count': int,
671 '_old_archive_ids': ['twitter 910031516746514432'],
674 'skip_download': True, # requires ffmpeg
677 # card via api.twitter.com/1.1/videos/tweet/config
678 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
680 'id': '1001551417340022785',
681 'display_id': '1001551623938805763',
683 'title': 're:.*?Shep is on a roll today.*?',
684 'thumbnail': r
're:^https?://.*\.jpg',
685 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
686 'channel_id': '255036353',
687 'uploader': 'Lis Power',
688 'uploader_id': 'LisPower1',
690 'timestamp': 1527623489,
691 'upload_date': '20180529',
692 'uploader_url': 'https://twitter.com/LisPower1',
693 'comment_count': int,
698 '_old_archive_ids': ['twitter 1001551623938805763'],
701 'skip_download': True, # requires ffmpeg
704 'url': 'https://twitter.com/foobar/status/1087791357756956680',
706 'id': '1087791272830607360',
707 'display_id': '1087791357756956680',
709 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
710 'thumbnail': r
're:^https?://.*\.jpg',
711 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
715 'timestamp': 1548184644,
716 'upload_date': '20190122',
717 'uploader_url': 'https://twitter.com/X',
718 'comment_count': int,
725 'skip': 'This Tweet is unavailable',
727 # not available in Periscope
728 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
730 'id': '1vOGwqejwoWxB',
732 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
734 'uploader_id': '1eVjYOLGkGrQL',
735 'thumbnail': r
're:^https?://.*\.jpg',
736 'tags': ['EduTECH2019'],
739 'add_ie': ['TwitterBroadcast'],
740 'skip': 'Broadcast no longer exists',
743 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
745 'id': '1349774757969989634',
746 'display_id': '1349794411333394432',
748 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
749 'thumbnail': r
're:^https?://.*\.jpg',
750 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
751 'channel_id': '18552281',
752 'uploader': 'Brooklyn Nets',
753 'uploader_id': 'BrooklynNets',
755 'timestamp': 1610651040,
756 'upload_date': '20210114',
757 'uploader_url': 'https://twitter.com/BrooklynNets',
758 'comment_count': int,
763 '_old_archive_ids': ['twitter 1349794411333394432'],
766 'skip_download': True,
769 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
771 'id': '1577855447914409984',
772 'display_id': '1577855540407197696',
774 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
775 'description': 'md5:b9c3699335447391d11753ab21c70a74',
776 'upload_date': '20221006',
777 'channel_id': '143077138',
778 'uploader': 'Oshtru',
779 'uploader_id': 'oshtru',
780 'uploader_url': 'https://twitter.com/oshtru',
781 'thumbnail': r
're:^https?://.*\.jpg',
783 'timestamp': 1665025050,
784 'comment_count': int,
789 '_old_archive_ids': ['twitter 1577855540407197696'],
791 'params': {'skip_download': True},
793 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
795 'id': '1577719286659006464',
796 'title': 'Ultima Reload - Test',
797 'description': 'Test https://t.co/Y3KEZD7Dad',
798 'channel_id': '168922496',
799 'uploader': 'Ultima Reload',
800 'uploader_id': 'UltimaShadowX',
801 'uploader_url': 'https://twitter.com/UltimaShadowX',
802 'upload_date': '20221005',
803 'timestamp': 1664992565,
804 'comment_count': int,
811 'params': {'skip_download': True},
813 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
815 'id': '1575559336759263233',
816 'display_id': '1575560063510810624',
818 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
819 'thumbnail': r
're:^https?://.*\.jpg',
820 'description': 'md5:95aea692fda36a12081b9629b02daa92',
821 'channel_id': '1094109584',
822 'uploader': 'Max Olson',
823 'uploader_id': 'MesoMax919',
824 'uploader_url': 'https://twitter.com/MesoMax919',
826 'timestamp': 1664477766,
827 'upload_date': '20220929',
828 'comment_count': int,
831 'tags': ['HurricaneIan'],
833 '_old_archive_ids': ['twitter 1575560063510810624'],
836 # Adult content, fails if not logged in
837 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
839 'id': '1575199163847000068',
840 'display_id': '1575199173472927762',
844 'channel_id': '1217167793541480450',
846 'uploader_id': 'Rizdraws',
847 'uploader_url': 'https://twitter.com/Rizdraws',
848 'upload_date': '20220928',
849 'timestamp': 1664391723,
850 'thumbnail': r
're:^https?://.+\.jpg',
853 'comment_count': int,
856 '_old_archive_ids': ['twitter 1575199173472927762'],
858 'params': {'skip_download': 'The media could not be played'},
859 'skip': 'Requires authentication',
861 # Playlist result only with graphql API
862 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
863 'playlist_mincount': 2,
865 'id': '1395079556562706435',
868 'channel_id': '21539378',
871 'upload_date': '20210519',
874 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
875 'uploader_id': 'Srirachachau',
876 'comment_count': int,
877 'uploader_url': 'https://twitter.com/Srirachachau',
878 'timestamp': 1621447860,
881 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
882 'playlist_mincount': 2,
884 'id': '1578353380363501568',
886 'channel_id': '2195866214',
887 'uploader_id': 'DavidToons_',
891 'timestamp': 1665143744,
892 'uploader_url': 'https://twitter.com/DavidToons_',
893 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
895 'comment_count': int,
896 'upload_date': '20221007',
900 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
903 'id': '1578401165338976258',
905 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
906 'channel_id': '19338359',
908 'uploader_id': 'primevideouk',
909 'timestamp': 1665155137,
910 'upload_date': '20221007',
912 'uploader_url': 'https://twitter.com/primevideouk',
913 'comment_count': int,
916 'tags': ['TheRingsOfPower'],
920 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
922 'id': '1lPJqmBeeNAJb',
924 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
925 'uploader': r
're:Monique Camarra.+?',
926 'uploader_id': 'MoniqueCamarra',
927 'live_status': 'was_live',
928 'release_timestamp': 1658417414,
929 'description': r
're:Twitter Space participated by Sergej Sumlenny.+',
930 'timestamp': 1658407771,
931 'release_date': '20220721',
932 'upload_date': '20220721',
934 'add_ie': ['TwitterSpaces'],
935 'params': {'skip_download': 'm3u8'},
937 # URL specifies video number but --yes-playlist
938 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
939 'playlist_mincount': 2,
941 'id': '1600649710662213632',
942 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
943 'timestamp': 1670459604.0,
944 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
945 'comment_count': int,
946 'uploader_id': 'CTVJLaidlaw',
947 'channel_id': '80082014',
949 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
950 'upload_date': '20221208',
952 'uploader': 'Jocelyn Laidlaw',
953 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
957 # URL specifies video number and --no-playlist
958 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
960 'id': '1600649511827013632',
962 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
963 'thumbnail': r
're:^https?://.+\.jpg',
964 'timestamp': 1670459604.0,
965 'channel_id': '80082014',
966 'uploader_id': 'CTVJLaidlaw',
967 'uploader': 'Jocelyn Laidlaw',
969 'comment_count': int,
970 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
972 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
973 'display_id': '1600649710662213632',
975 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
976 'upload_date': '20221208',
978 '_old_archive_ids': ['twitter 1600649710662213632'],
980 'params': {'noplaylist': True},
982 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
983 # note the id different between extraction and url
984 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
986 'id': '1621117577354424321',
987 'display_id': '1621117700482416640',
989 'title': '뽀 - 아 최우제 이동속도 봐',
990 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
992 'channel_id': '1281839411068432384',
994 'uploader_id': 's2FAKER',
995 'uploader_url': 'https://twitter.com/s2FAKER',
996 'upload_date': '20230202',
997 'timestamp': 1675339553.0,
998 'thumbnail': r
're:https?://pbs\.twimg\.com/.+',
1002 'repost_count': int,
1003 'comment_count': int,
1004 '_old_archive_ids': ['twitter 1621117700482416640'],
1006 'skip': 'Requires authentication',
1008 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1010 'id': '1599108643743473680',
1011 'display_id': '1599108751385972737',
1013 'title': '\u06ea - \U0001F48B',
1014 'channel_id': '1347791436809441283',
1015 'uploader_url': 'https://twitter.com/hlo_again',
1017 'uploader_id': 'hlo_again',
1018 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1019 'repost_count': int,
1021 'comment_count': int,
1022 'upload_date': '20221203',
1024 'timestamp': 1670092210.0,
1026 'uploader': '\u06ea',
1027 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1028 '_old_archive_ids': ['twitter 1599108751385972737'],
1030 'params': {'noplaylist': True},
1032 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1034 'id': '1600009362759733248',
1035 'display_id': '1600009574919962625',
1037 'channel_id': '211814412',
1038 'uploader_url': 'https://twitter.com/MunTheShinobi',
1039 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1040 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1043 'repost_count': int,
1044 'upload_date': '20221206',
1045 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1046 'comment_count': int,
1049 'uploader_id': 'MunTheShinobi',
1050 'duration': 139.987,
1051 'timestamp': 1670306984.0,
1052 '_old_archive_ids': ['twitter 1600009574919962625'],
1055 # retweeted_status (private)
1056 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1058 'id': '1623274794488659969',
1059 'display_id': '1623739803874349067',
1061 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1062 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1063 'uploader': 'Johnny Bullets',
1064 'uploader_id': 'Johnnybull3ts',
1065 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1069 'timestamp': 1675853859.0,
1070 'upload_date': '20230208',
1071 'thumbnail': r
're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1073 'repost_count': int,
1075 'skip': 'Protected tweet',
1078 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1080 'id': '1694928337846538240',
1082 'display_id': '1695424220702888009',
1083 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1084 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1085 'channel_id': '15212187',
1086 'uploader': 'Benny Johnson',
1087 'uploader_id': 'bennyjohnson',
1088 'uploader_url': 'https://twitter.com/bennyjohnson',
1092 'timestamp': 1692962814.0,
1093 'upload_date': '20230825',
1094 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1096 'repost_count': int,
1097 'comment_count': int,
1098 '_old_archive_ids': ['twitter 1695424220702888009'],
1101 # retweeted_status w/ legacy API
1102 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1104 'id': '1694928337846538240',
1106 'display_id': '1695424220702888009',
1107 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1108 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1109 'channel_id': '15212187',
1110 'uploader': 'Benny Johnson',
1111 'uploader_id': 'bennyjohnson',
1112 'uploader_url': 'https://twitter.com/bennyjohnson',
1116 'timestamp': 1692962814.0,
1117 'upload_date': '20230825',
1118 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1120 'repost_count': int,
1121 '_old_archive_ids': ['twitter 1695424220702888009'],
1123 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1125 # Broadcast embedded in tweet
1126 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1128 'id': '1rmxPMjLzAXKN',
1130 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1131 'uploader': 'Jessica Dobson',
1132 'uploader_id': 'JessicaDobsonWX',
1133 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1134 'timestamp': 1701566398,
1135 'upload_date': '20231203',
1136 'live_status': 'was_live',
1137 'thumbnail': r
're:https://[^/]+pscp\.tv/.+\.jpg',
1138 'concurrent_view_count': int,
1141 'add_ie': ['TwitterBroadcast'],
1143 # Animated gif and quote tweet video
1144 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1145 'playlist_mincount': 2,
1147 'id': '1696256659889565950',
1148 'title': 'BAKOON - https://t.co/zom968d0a0',
1149 'description': 'https://t.co/zom968d0a0',
1151 'channel_id': '1263540390',
1152 'uploader': 'BAKOON',
1153 'uploader_id': 'BAKKOOONN',
1154 'uploader_url': 'https://twitter.com/BAKKOOONN',
1156 'timestamp': 1693254077.0,
1157 'upload_date': '20230828',
1159 'comment_count': int,
1160 'repost_count': int,
1162 'skip': 'Requires authentication',
1164 # "stale tweet" with typename "TweetWithVisibilityResults"
1165 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1166 'md5': '511377ff8dfa7545307084dca4dce319',
1168 'id': '1724883339285544960',
1170 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1171 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1172 'display_id': '1724884212803834154',
1173 'channel_id': '337808606',
1174 'uploader': 'Robert F. Kennedy Jr',
1175 'uploader_id': 'RobertKennedyJr',
1176 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1177 'upload_date': '20231115',
1178 'timestamp': 1700079417.0,
1179 'duration': 341.048,
1180 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1181 'tags': ['Kennedy24'],
1182 'repost_count': int,
1184 'comment_count': int,
1186 '_old_archive_ids': ['twitter 1724884212803834154'],
1190 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1191 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1193 'id': '1790637589910654976',
1195 'title': 'Historic Vids - One of the most intense moments in history',
1196 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1197 'display_id': '1790637656616943991',
1198 'uploader': 'Historic Vids',
1199 'uploader_id': 'historyinmemes',
1200 'uploader_url': 'https://twitter.com/historyinmemes',
1201 'channel_id': '855481986290524160',
1202 'upload_date': '20240515',
1203 'timestamp': 1715756260.0,
1206 'comment_count': int,
1207 'repost_count': int,
1209 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1211 '_old_archive_ids': ['twitter 1790637656616943991'],
1215 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1216 'only_matching': True,
1219 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1220 'only_matching': True,
1222 # promo_video_website card
1223 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1224 'only_matching': True,
1226 # promo_video_convo card
1227 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1228 'only_matching': True,
1231 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1232 'only_matching': True,
1234 # video_direct_message card
1235 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1236 'only_matching': True,
1238 # poll2choice_video card
1239 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1240 'only_matching': True,
1242 # poll3choice_video card
1243 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1244 'only_matching': True,
1246 # poll4choice_video card
1247 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1248 'only_matching': True,
1251 _MEDIA_ID_RE
= re
.compile(r
'_video/(\d+)/')
1254 def _GRAPHQL_ENDPOINT(self
):
1255 if self
.is_logged_in
:
1256 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1257 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1259 def _graphql_to_legacy(self
, data
, twid
):
1260 result
= traverse_obj(data
, (
1261 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1262 lambda _
, v
: v
['entryId'] == f
'tweet-{twid}', 'content', 'itemContent',
1263 'tweet_results', 'result', ('tweet', None), {dict}
,
1264 ), default
={}, get_all
=False) if self
.is_logged_in
else traverse_obj(
1265 data
, ('tweetResult', 'result', {dict}
), default
={})
1267 typename
= result
.get('__typename')
1268 if typename
not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1269 self
.report_warning(f
'Unknown typename: {typename}', twid
, only_once
=True)
1271 if 'tombstone' in result
:
1272 cause
= remove_end(traverse_obj(result
, ('tombstone', 'text', 'text', {str}
)), '. Learn more')
1273 raise ExtractorError(f
'Twitter API says: {cause or "Unknown error"}', expected
=True)
1274 elif typename
== 'TweetUnavailable':
1275 reason
= result
.get('reason')
1276 if reason
== 'NsfwLoggedOut':
1277 self
.raise_login_required('NSFW tweet requires authentication')
1278 elif reason
== 'Protected':
1279 self
.raise_login_required('You are not authorized to view this protected tweet')
1280 raise ExtractorError(reason
or 'Requested tweet is unavailable', expected
=True)
1281 # Result for "stale tweet" needs additional transformation
1282 elif typename
== 'TweetWithVisibilityResults':
1283 result
= traverse_obj(result
, ('tweet', {dict}
)) or {}
1285 status
= result
.get('legacy', {})
1286 status
.update(traverse_obj(result
, {
1287 'user': ('core', 'user_results', 'result', 'legacy'),
1288 'card': ('card', 'legacy'),
1289 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1290 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1291 }, expected_type
=dict, default
={}))
1293 # extra transformations needed since result does not match legacy format
1294 if status
.get('retweeted_status'):
1295 status
['retweeted_status']['user'] = traverse_obj(status
, (
1296 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict}
)) or {}
1299 binding_value
.get('key'): binding_value
.get('value')
1300 for binding_value
in traverse_obj(status
, ('card', 'binding_values', ..., {dict}
))
1303 status
['card']['binding_values'] = binding_values
1307 def _build_graphql_query(self
, media_id
):
1310 'focalTweetId': media_id
,
1311 'includePromotedContent': True,
1312 'with_rux_injections': False,
1313 'withBirdwatchNotes': True,
1314 'withCommunity': True,
1315 'withDownvotePerspective': False,
1316 'withQuickPromoteEligibilityTweetFields': True,
1317 'withReactionsMetadata': False,
1318 'withReactionsPerspective': False,
1319 'withSuperFollowsTweetFields': True,
1320 'withSuperFollowsUserFields': True,
1321 'withV2Timeline': True,
1325 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1326 'interactive_text_enabled': True,
1327 'responsive_web_edit_tweet_api_enabled': True,
1328 'responsive_web_enhance_cards_enabled': True,
1329 'responsive_web_graphql_timeline_navigation_enabled': False,
1330 'responsive_web_text_conversations_enabled': False,
1331 'responsive_web_uc_gql_enabled': True,
1332 'standardized_nudges_misinfo': True,
1333 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1334 'tweetypie_unmention_optimization_enabled': True,
1335 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1336 'verified_phone_label_enabled': False,
1337 'vibe_api_enabled': True,
1339 } if self
.is_logged_in
else {
1341 'tweetId': media_id
,
1342 'withCommunity': False,
1343 'includePromotedContent': False,
1347 'creator_subscriptions_tweet_preview_api_enabled': True,
1348 'tweetypie_unmention_optimization_enabled': True,
1349 'responsive_web_edit_tweet_api_enabled': True,
1350 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1351 'view_counts_everywhere_api_enabled': True,
1352 'longform_notetweets_consumption_enabled': True,
1353 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1354 'tweet_awards_web_tipping_enabled': False,
1355 'freedom_of_speech_not_reach_fetch_enabled': True,
1356 'standardized_nudges_misinfo': True,
1357 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1358 'longform_notetweets_rich_text_read_enabled': True,
1359 'longform_notetweets_inline_media_enabled': True,
1360 'responsive_web_graphql_exclude_directive_enabled': True,
1361 'verified_phone_label_enabled': False,
1362 'responsive_web_media_download_video_enabled': False,
1363 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1364 'responsive_web_graphql_timeline_navigation_enabled': True,
1365 'responsive_web_enhance_cards_enabled': False,
1368 'withArticleRichContentState': False,
1372 def _call_syndication_api(self
, twid
):
1373 self
.report_warning(
1374 'Not all metadata or media is available via syndication endpoint', twid
, only_once
=True)
1375 status
= self
._download
_json
(
1376 'https://cdn.syndication.twimg.com/tweet-result', twid
, 'Downloading syndication JSON',
1377 headers
={'User-Agent': 'Googlebot'}, query
={
1379 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1380 'token': ''.join(random
.choices('123456789abcdefghijklmnopqrstuvwxyz', k
=10)),
1383 raise ExtractorError('Syndication endpoint returned empty JSON response')
1384 # Transform the result so its structure matches that of legacy/graphql
1386 for detail
in traverse_obj(status
, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict}
)):
1387 detail
['id_str'] = traverse_obj(detail
, (
1388 'video_info', 'variants', ..., 'url', {self
._MEDIA
_ID
_RE
.search
}, 1), get_all
=False) or twid
1389 media
.append(detail
)
1390 status
['extended_entities'] = {'media': media
}
1394 def _extract_status(self
, twid
):
1395 if self
._selected
_api
not in ('graphql', 'legacy', 'syndication'):
1396 raise ExtractorError(f
'{self._selected_api!r} is not a valid API selection', expected
=True)
1399 if self
.is_logged_in
or self
._selected
_api
== 'graphql':
1400 status
= self
._graphql
_to
_legacy
(self
._call
_graphql
_api
(self
._GRAPHQL
_ENDPOINT
, twid
), twid
)
1401 elif self
._selected
_api
== 'legacy':
1402 status
= self
._call
_api
(f
'statuses/show/{twid}.json', twid
, {
1403 'cards_platform': 'Web-12',
1405 'include_reply_count': 1,
1406 'include_user_entities': 0,
1407 'tweet_mode': 'extended',
1409 except ExtractorError
as e
:
1410 if not isinstance(e
.cause
, HTTPError
) or not e
.cause
.status
== 429:
1412 self
.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1413 status
= self
._call
_syndication
_api
(twid
)
1415 if self
._selected
_api
== 'syndication':
1416 status
= self
._call
_syndication
_api
(twid
)
1418 return traverse_obj(status
, 'retweeted_status', None, expected_type
=dict) or {}
1420 def _real_extract(self
, url
):
1421 twid
, selected_index
= self
._match
_valid
_url
(url
).group('id', 'index')
1422 status
= self
._extract
_status
(twid
)
1424 title
= description
= traverse_obj(
1425 status
, (('full_text', 'text'), {lambda x
: x
.replace('\n', ' ')}), get_all
=False) or ''
1426 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1427 title
= re
.sub(r
'\s+(https?://[^ ]+)', '', title
)
1428 user
= status
.get('user') or {}
1429 uploader
= user
.get('name')
1431 title
= f
'{uploader} - {title}'
1432 uploader_id
= user
.get('screen_name')
1437 'description': description
,
1438 'uploader': uploader
,
1439 'timestamp': unified_timestamp(status
.get('created_at')),
1440 'channel_id': str_or_none(status
.get('user_id_str')) or str_or_none(user
.get('id_str')),
1441 'uploader_id': uploader_id
,
1442 'uploader_url': format_field(uploader_id
, None, 'https://twitter.com/%s'),
1443 'like_count': int_or_none(status
.get('favorite_count')),
1444 'repost_count': int_or_none(status
.get('retweet_count')),
1445 'comment_count': int_or_none(status
.get('reply_count')),
1446 'age_limit': 18 if status
.get('possibly_sensitive') else 0,
1447 'tags': traverse_obj(status
, ('entities', 'hashtags', ..., 'text')),
1450 def extract_from_video_info(media
):
1451 media_id
= traverse_obj(media
, 'id_str', 'id', expected_type
=str_or_none
)
1452 self
.write_debug(f
'Extracting from video info: {media_id}')
1456 for variant
in traverse_obj(media
, ('video_info', 'variants', ...)):
1457 fmts
, subs
= self
._extract
_variant
_formats
(variant
, twid
)
1458 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
1459 formats
.extend(fmts
)
1462 media_url
= media
.get('media_url_https') or media
.get('media_url')
1464 def add_thumbnail(name
, size
):
1467 'url': update_url_query(media_url
, {'name': name
}),
1468 'width': int_or_none(size
.get('w') or size
.get('width')),
1469 'height': int_or_none(size
.get('h') or size
.get('height')),
1471 for name
, size
in media
.get('sizes', {}).items():
1472 add_thumbnail(name
, size
)
1473 add_thumbnail('orig', media
.get('original_info') or {})
1478 'subtitles': subtitles
,
1479 'thumbnails': thumbnails
,
1480 'view_count': traverse_obj(media
, ('mediaStats', 'viewCount', {int_or_none}
)), # No longer available
1481 'duration': float_or_none(traverse_obj(media
, ('video_info', 'duration_millis')), 1000),
1482 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1483 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1486 def extract_from_card_info(card
):
1490 self
.write_debug(f
'Extracting from card info: {card.get("url")}')
1491 binding_values
= card
['binding_values']
1493 def get_binding_value(k
):
1494 o
= binding_values
.get(k
) or {}
1495 return try_get(o
, lambda x
: x
[x
['type'].lower() + '_value'])
1497 card_name
= card
['name'].split(':')[-1]
1498 if card_name
== 'player':
1501 'url': get_binding_value('player_url'),
1503 elif card_name
== 'periscope_broadcast':
1506 'url': get_binding_value('url') or get_binding_value('player_url'),
1507 'ie_key': PeriscopeIE
.ie_key(),
1509 elif card_name
== 'broadcast':
1512 'url': get_binding_value('broadcast_url'),
1513 'ie_key': TwitterBroadcastIE
.ie_key(),
1515 elif card_name
== 'audiospace':
1518 'url': f
'https://twitter.com/i/spaces/{get_binding_value("id")}',
1519 'ie_key': TwitterSpacesIE
.ie_key(),
1521 elif card_name
== 'summary':
1524 'url': get_binding_value('card_url'),
1526 elif card_name
== 'unified_card':
1527 unified_card
= self
._parse
_json
(get_binding_value('unified_card'), twid
)
1528 yield from map(extract_from_video_info
, traverse_obj(
1529 unified_card
, ('media_entities', ...), expected_type
=dict))
1530 # amplify, promo_video_website, promo_video_convo, appplayer,
1531 # video_direct_message, poll2choice_video, poll3choice_video,
1532 # poll4choice_video, ...
1534 is_amplify
= card_name
== 'amplify'
1535 vmap_url
= get_binding_value('amplify_url_vmap') if is_amplify
else get_binding_value('player_stream_url')
1536 content_id
= get_binding_value('%s_content_id' % (card_name
if is_amplify
else 'player'))
1537 formats
, subtitles
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, content_id
or twid
)
1540 for suffix
in ('_small', '', '_large', '_x_large', '_original'):
1541 image
= get_binding_value('player_image' + suffix
) or {}
1542 image_url
= image
.get('url')
1543 if not image_url
or '/player-placeholder' in image_url
:
1546 'id': suffix
[1:] if suffix
else 'medium',
1548 'width': int_or_none(image
.get('width')),
1549 'height': int_or_none(image
.get('height')),
1554 'subtitles': subtitles
,
1555 'thumbnails': thumbnails
,
1556 'duration': int_or_none(get_binding_value(
1557 'content_duration_seconds')),
1560 videos
= traverse_obj(status
, (
1561 (None, 'quoted_status'), 'extended_entities', 'media', lambda _
, m
: m
['type'] != 'photo', {dict}
))
1563 if self
._yes
_playlist
(twid
, selected_index
, video_label
='URL-specified video number'):
1564 selected_entries
= (*map(extract_from_video_info
, videos
), *extract_from_card_info(status
.get('card')))
1566 desired_obj
= traverse_obj(status
, (
1567 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index
) - 1, {dict}
), get_all
=False)
1569 raise ExtractorError(f
'Video #{selected_index} is unavailable', expected
=True)
1570 elif desired_obj
.get('type') != 'video':
1571 raise ExtractorError(f
'Media #{selected_index} is not a video', expected
=True)
1573 # Restore original archive id and video index in title
1574 for index
, entry
in enumerate(videos
, 1):
1575 if entry
.get('id') != desired_obj
.get('id'):
1578 info
['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1579 if len(videos
) != 1:
1580 info
['title'] += f
' #{index}'
1583 return {**info
, **extract_from_video_info(desired_obj
), 'display_id': twid
}
1585 entries
= [{**info
, **data
, 'display_id': twid
} for data
in selected_entries
]
1587 expanded_url
= traverse_obj(status
, ('entities', 'urls', 0, 'expanded_url'), expected_type
=url_or_none
)
1588 if not expanded_url
or expanded_url
== url
:
1589 self
.raise_no_formats('No video could be found in this tweet', expected
=True)
1592 return self
.url_result(expanded_url
, display_id
=twid
, **info
)
1594 entries
[0]['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1596 if len(entries
) == 1:
1599 for index
, entry
in enumerate(entries
, 1):
1600 entry
['title'] += f
' #{index}'
1602 return self
.playlist_result(entries
, **info
)
1605 class TwitterAmplifyIE(TwitterBaseIE
):
1606 IE_NAME
= 'twitter:amplify'
1607 _VALID_URL
= r
'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1610 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1611 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1613 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1615 'title': 'Twitter Video',
1616 'thumbnail': 're:^https?://.*',
1618 'params': {'format': '[protocol=https]'},
1621 def _real_extract(self
, url
):
1622 video_id
= self
._match
_id
(url
)
1623 webpage
= self
._download
_webpage
(url
, video_id
)
1625 vmap_url
= self
._html
_search
_meta
(
1626 'twitter:amplify:vmap', webpage
, 'vmap url')
1627 formats
, _
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, video_id
)
1630 thumbnail
= self
._html
_search
_meta
(
1631 'twitter:image:src', webpage
, 'thumbnail', fatal
=False)
1633 def _find_dimension(target
):
1634 w
= int_or_none(self
._html
_search
_meta
(
1635 f
'twitter:{target}:width', webpage
, fatal
=False))
1636 h
= int_or_none(self
._html
_search
_meta
(
1637 f
'twitter:{target}:height', webpage
, fatal
=False))
1641 thumbnail_w
, thumbnail_h
= _find_dimension('image')
1644 'width': thumbnail_w
,
1645 'height': thumbnail_h
,
1648 video_w
, video_h
= _find_dimension('player')
1656 'title': 'Twitter Video',
1658 'thumbnails': thumbnails
,
1662 class TwitterBroadcastIE(TwitterBaseIE
, PeriscopeBaseIE
):
1663 IE_NAME
= 'twitter:broadcast'
1664 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1667 # untitled Periscope video
1668 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1670 'id': '1yNGaQLWpejGj',
1672 'title': 'Andrea May Sahouri - Periscope Broadcast',
1673 'uploader': 'Andrea May Sahouri',
1674 'uploader_id': 'andreamsahouri',
1675 'uploader_url': 'https://twitter.com/andreamsahouri',
1676 'timestamp': 1590973638,
1677 'upload_date': '20200601',
1678 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1682 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1684 'id': '1ZkKzeyrPbaxv',
1686 'title': 'Starship | SN10 | High-Altitude Flight Test',
1687 'uploader': 'SpaceX',
1688 'uploader_id': 'SpaceX',
1689 'uploader_url': 'https://twitter.com/SpaceX',
1690 'timestamp': 1614812942,
1691 'upload_date': '20210303',
1692 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1696 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1698 'id': '1OyKAVQrgzwGb',
1700 'title': 'Starship Flight Test',
1701 'uploader': 'SpaceX',
1702 'uploader_id': 'SpaceX',
1703 'uploader_url': 'https://twitter.com/SpaceX',
1704 'timestamp': 1681993964,
1705 'upload_date': '20230420',
1706 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1711 def _real_extract(self
, url
):
1712 broadcast_id
= self
._match
_id
(url
)
1713 broadcast
= self
._call
_api
(
1714 'broadcasts/show.json', broadcast_id
,
1715 {'ids': broadcast_id
})['broadcasts'][broadcast_id
]
1717 raise ExtractorError('Broadcast no longer exists', expected
=True)
1718 info
= self
._parse
_broadcast
_data
(broadcast
, broadcast_id
)
1719 info
['title'] = broadcast
.get('status') or info
.get('title')
1720 info
['uploader_id'] = broadcast
.get('twitter_username') or info
.get('uploader_id')
1721 info
['uploader_url'] = format_field(broadcast
, 'twitter_username', 'https://twitter.com/%s', default
=None)
1722 if info
['live_status'] == 'is_upcoming':
1725 media_key
= broadcast
['media_key']
1726 source
= self
._call
_api
(
1727 f
'live_video_stream/status/{media_key}', media_key
)['source']
1728 m3u8_url
= source
.get('noRedirectPlaybackUrl') or source
['location']
1729 if '/live_video_stream/geoblocked/' in m3u8_url
:
1730 self
.raise_geo_restricted()
1731 m3u8_id
= urllib
.parse
.parse_qs(urllib
.parse
.urlparse(
1732 m3u8_url
).query
).get('type', [None])[0]
1733 state
, width
, height
= self
._extract
_common
_format
_info
(broadcast
)
1734 info
['formats'] = self
._extract
_pscp
_m
3u8_formats
(
1735 m3u8_url
, broadcast_id
, m3u8_id
, state
, width
, height
)
1739 class TwitterSpacesIE(TwitterBaseIE
):
1740 IE_NAME
= 'twitter:spaces'
1741 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1744 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1746 'id': '1RDxlgyvNXzJL',
1748 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1749 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1750 'uploader': r
're:Lucio Di Gaetano.*?',
1751 'uploader_id': 'luciodigaetano',
1752 'live_status': 'was_live',
1753 'timestamp': 1659877956,
1754 'upload_date': '20220807',
1755 'release_timestamp': 1659904215,
1756 'release_date': '20220807',
1758 'skip': 'No longer available',
1760 # post_live/TimedOut but downloadable
1761 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1763 'id': '1vAxRAVQWONJl',
1765 'title': 'Framing Up FinOps: Billing Tools',
1766 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1767 'uploader': 'Google Cloud',
1768 'uploader_id': 'googlecloud',
1769 'live_status': 'post_live',
1770 'timestamp': 1681409554,
1771 'upload_date': '20230413',
1772 'release_timestamp': 1681839000,
1773 'release_date': '20230418',
1774 'protocol': 'm3u8', # ffmpeg is forced
1775 'container': 'm4a_dash', # audio-only format fixup is applied
1777 'params': {'skip_download': 'm3u8'},
1779 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1780 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1782 'id': '1eaKbrQbjoRKX',
1785 'description': 'Twitter Space participated by nobody yet',
1786 'uploader': '息根とめる',
1787 'uploader_id': 'tomeru_ikinone',
1788 'live_status': 'was_live',
1789 'timestamp': 1685617198,
1790 'upload_date': '20230601',
1791 'protocol': 'm3u8', # ffmpeg is forced
1792 'container': 'm4a_dash', # audio-only format fixup is applied
1794 'params': {'skip_download': 'm3u8'},
1797 'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
1799 'id': '1DXGydznBYWKM',
1801 'title': 'America and Israel’s “special relationship”',
1802 'description': 'Twitter Space participated by nobody yet',
1803 'uploader': 'Candace Owens',
1804 'uploader_id': 'RealCandaceO',
1805 'live_status': 'was_live',
1806 'timestamp': 1723931351,
1807 'upload_date': '20240817',
1808 'release_timestamp': 1723932000,
1809 'release_date': '20240817',
1810 'protocol': 'm3u8_native', # not ffmpeg, detected as video space
1812 'params': {'skip_download': 'm3u8'},
1816 'notstarted': 'is_upcoming',
1817 'ended': 'was_live',
1818 'running': 'is_live',
1819 'timedout': 'post_live',
1822 def _build_graphql_query(self
, space_id
):
1826 'isMetatagsQuery': True,
1827 'withDownvotePerspective': False,
1828 'withReactionsMetadata': False,
1829 'withReactionsPerspective': False,
1830 'withReplays': True,
1831 'withSuperFollowsUserFields': True,
1832 'withSuperFollowsTweetFields': True,
1835 'dont_mention_me_view_api_enabled': True,
1836 'interactive_text_enabled': True,
1837 'responsive_web_edit_tweet_api_enabled': True,
1838 'responsive_web_enhance_cards_enabled': True,
1839 'responsive_web_uc_gql_enabled': True,
1840 'spaces_2022_h2_clipping': True,
1841 'spaces_2022_h2_spaces_communities': False,
1842 'standardized_nudges_misinfo': True,
1843 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1844 'vibe_api_enabled': True,
1848 def _real_extract(self
, url
):
1849 space_id
= self
._match
_id
(url
)
1850 space_data
= self
._call
_graphql
_api
('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id
)['audioSpace']
1852 raise ExtractorError('Twitter Space not found', expected
=True)
1854 metadata
= space_data
['metadata']
1855 live_status
= try_call(lambda: self
.SPACE_STATUS
[metadata
['state'].lower()])
1856 is_live
= live_status
== 'is_live'
1859 headers
= {'Referer': 'https://twitter.com/'}
1860 if live_status
== 'is_upcoming':
1861 self
.raise_no_formats('Twitter Space not started yet', expected
=True)
1862 elif not is_live
and not metadata
.get('is_space_available_for_replay'):
1863 self
.raise_no_formats('Twitter Space ended and replay is disabled', expected
=True)
1864 elif metadata
.get('media_key'):
1865 source
= traverse_obj(
1866 self
._call
_api
(f
'live_video_stream/status/{metadata["media_key"]}', metadata
['media_key']),
1867 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}
), get_all
=False)
1868 is_audio_space
= source
and 'audio-space' in source
1869 formats
= self
._extract
_m
3u8_formats
(
1870 source
, metadata
['media_key'], 'm4a' if is_audio_space
else 'mp4',
1871 # XXX: Some audio-only Spaces need ffmpeg as downloader
1872 entry_protocol
='m3u8' if is_audio_space
else 'm3u8_native',
1873 live
=is_live
, headers
=headers
, fatal
=False) if source
else []
1876 fmt
.update({'vcodec': 'none', 'acodec': 'aac'})
1878 fmt
['container'] = 'm4a_dash'
1880 participants
= ', '.join(traverse_obj(
1881 space_data
, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1883 if not formats
and live_status
== 'post_live':
1884 self
.raise_no_formats('Twitter Space ended but not downloadable yet', expected
=True)
1888 'title': metadata
.get('title'),
1889 'description': f
'Twitter Space participated by {participants}',
1890 'uploader': traverse_obj(
1891 metadata
, ('creator_results', 'result', 'legacy', 'name')),
1892 'uploader_id': traverse_obj(
1893 metadata
, ('creator_results', 'result', 'legacy', 'screen_name')),
1894 'live_status': live_status
,
1895 'release_timestamp': try_call(
1896 lambda: int_or_none(metadata
['scheduled_start'], scale
=1000)),
1897 'timestamp': int_or_none(metadata
.get('created_at'), scale
=1000),
1899 'http_headers': headers
,
1903 class TwitterShortenerIE(TwitterBaseIE
):
1904 IE_NAME
= 'twitter:shortener'
1905 _VALID_URL
= r
'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1906 _BASE_URL
= 'https://t.co/'
1908 def _real_extract(self
, url
):
1909 mobj
= self
._match
_valid
_url
(url
)
1910 eid
, shortcode
= mobj
.group('eid', 'id')
1913 url
= self
._BASE
_URL
+ shortcode
1914 new_url
= self
._request
_webpage
(url
, shortcode
, headers
={'User-Agent': 'curl'}).url
1915 __UNSAFE_LINK
= 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
1916 if new_url
.startswith(__UNSAFE_LINK
):
1917 new_url
= new_url
.replace(__UNSAFE_LINK
, '')
1918 return self
.url_result(new_url
)