7 from .common
import InfoExtractor
8 from .periscope
import PeriscopeBaseIE
, PeriscopeIE
9 from ..networking
.exceptions
import HTTPError
32 class TwitterBaseIE(InfoExtractor
):
33 _NETRC_MACHINE
= 'twitter'
34 _API_BASE
= 'https://api.x.com/1.1/'
35 _GRAPHQL_API_BASE
= 'https://x.com/i/api/graphql/'
36 _BASE_REGEX
= r
'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
37 _AUTH
= 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
38 _LEGACY_AUTH
= 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
41 _LOGIN_INIT_DATA
= json
.dumps({
44 'debug_overrides': {},
46 'location': 'unknown',
53 'app_download_cta': 1,
54 'check_logged_in_account': 1,
55 'choice_selection': 3,
56 'contacts_live_sync_permission_prompt': 0,
58 'email_verification': 2,
68 'in_app_notification': 1,
70 'js_instrumentation': 1,
72 'notifications_permission_prompt': 2,
74 'open_home_timeline': 1,
76 'phone_verification': 4,
85 'tweet_selection_urt': 1,
88 'user_recommendations_list': 4,
89 'user_recommendations_urt': 1,
93 }, separators
=(',', ':')).encode()
95 def _extract_variant_formats(self
, variant
, video_id
):
96 variant_url
= variant
.get('url')
99 elif '.m3u8' in variant_url
:
100 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
101 variant_url
, video_id
, 'mp4', 'm3u8_native',
102 m3u8_id
='hls', fatal
=False)
103 for f
in traverse_obj(fmts
, lambda _
, v
: v
['vcodec'] == 'none' and v
.get('tbr') is None):
104 if mobj
:= re
.match(r
'hls-[Aa]udio-(?P<bitrate>\d{4,})', f
['format_id']):
105 f
['tbr'] = int_or_none(mobj
.group('bitrate'), 1000)
108 tbr
= int_or_none(dict_get(variant
, ('bitrate', 'bit_rate')), 1000) or None
111 'format_id': join_nonempty('http', tbr
),
114 self
._search
_dimensions
_in
_video
_url
(f
, variant_url
)
117 def _extract_formats_from_vmap_url(self
, vmap_url
, video_id
):
118 vmap_url
= url_or_none(vmap_url
)
121 vmap_data
= self
._download
_xml
(vmap_url
, video_id
)
125 for video_variant
in vmap_data
.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
126 video_variant
.attrib
['url'] = urllib
.parse
.unquote(
127 video_variant
.attrib
['url'])
128 urls
.append(video_variant
.attrib
['url'])
129 fmts
, subs
= self
._extract
_variant
_formats
(
130 video_variant
.attrib
, video_id
)
132 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
133 video_url
= strip_or_none(xpath_text(vmap_data
, './/MediaFile'))
134 if video_url
not in urls
:
135 fmts
, subs
= self
._extract
_variant
_formats
({'url': video_url
}, video_id
)
137 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
138 return formats
, subtitles
141 def _search_dimensions_in_video_url(a_format
, video_url
):
142 m
= re
.search(r
'/(?P<width>\d+)x(?P<height>\d+)/', video_url
)
145 'width': int(m
.group('width')),
146 'height': int(m
.group('height')),
150 def is_logged_in(self
):
151 return bool(self
._get
_cookies
(self
._API
_BASE
).get('auth_token'))
153 @functools.cached_property
154 def _selected_api(self
):
155 return self
._configuration
_arg
('api', ['graphql'], ie_key
='Twitter')[0]
157 def _fetch_guest_token(self
, display_id
):
158 guest_token
= traverse_obj(self
._download
_json
(
159 f
'{self._API_BASE}guest/activate.json', display_id
, 'Downloading guest token', data
=b
'',
160 headers
=self
._set
_base
_headers
(legacy
=display_id
and self
._selected
_api
== 'legacy')),
161 ('guest_token', {str}
))
163 raise ExtractorError('Could not retrieve guest token')
166 def _set_base_headers(self
, legacy
=False):
167 bearer_token
= self
._LEGACY
_AUTH
if legacy
and not self
.is_logged_in
else self
._AUTH
169 'Authorization': f
'Bearer {bearer_token}',
170 'x-csrf-token': try_call(lambda: self
._get
_cookies
(self
._API
_BASE
)['ct0'].value
),
173 def _call_login_api(self
, note
, headers
, query
={}, data
=None):
174 response
= self
._download
_json
(
175 f
'{self._API_BASE}onboarding/task.json', None, note
,
176 headers
=headers
, query
=query
, data
=data
, expected_status
=400)
177 error
= traverse_obj(response
, ('errors', 0, 'message', {str}
))
179 raise ExtractorError(f
'Login failed, Twitter API says: {error}', expected
=True)
180 elif traverse_obj(response
, 'status') != 'success':
181 raise ExtractorError('Login was unsuccessful')
183 subtask
= traverse_obj(
184 response
, ('subtasks', ..., 'subtask_id', {str}
), get_all
=False)
186 raise ExtractorError('Twitter API did not return next login subtask')
188 self
._flow
_token
= response
['flow_token']
192 def _perform_login(self
, username
, password
):
193 if self
.is_logged_in
:
196 guest_token
= self
._fetch
_guest
_token
(None)
198 **self
._set
_base
_headers
(),
199 'content-type': 'application/json',
200 'x-guest-token': guest_token
,
201 'x-twitter-client-language': 'en',
202 'x-twitter-active-user': 'yes',
203 'Referer': 'https://x.com/',
204 'Origin': 'https://x.com',
207 def build_login_json(*subtask_inputs
):
209 'flow_token': self
._flow
_token
,
210 'subtask_inputs': subtask_inputs
,
211 }, separators
=(',', ':')).encode()
213 def input_dict(subtask_id
, text
):
215 'subtask_id': subtask_id
,
222 next_subtask
= self
._call
_login
_api
(
223 'Downloading flow token', headers
, query
={'flow_name': 'login'}, data
=self
._LOGIN
_INIT
_DATA
)
225 while not self
.is_logged_in
:
226 if next_subtask
== 'LoginJsInstrumentationSubtask':
227 next_subtask
= self
._call
_login
_api
(
228 'Submitting JS instrumentation response', headers
, data
=build_login_json({
229 'subtask_id': next_subtask
,
230 'js_instrumentation': {
236 elif next_subtask
== 'LoginEnterUserIdentifierSSO':
237 next_subtask
= self
._call
_login
_api
(
238 'Submitting username', headers
, data
=build_login_json({
239 'subtask_id': next_subtask
,
241 'setting_responses': [{
242 'key': 'user_identifier',
253 elif next_subtask
== 'LoginEnterAlternateIdentifierSubtask':
254 next_subtask
= self
._call
_login
_api
(
255 'Submitting alternate identifier', headers
,
256 data
=build_login_json(input_dict(next_subtask
, self
._get
_tfa
_info
(
257 'one of username, phone number or email that was not used as --username'))))
259 elif next_subtask
== 'LoginEnterPassword':
260 next_subtask
= self
._call
_login
_api
(
261 'Submitting password', headers
, data
=build_login_json({
262 'subtask_id': next_subtask
,
264 'password': password
,
269 elif next_subtask
== 'AccountDuplicationCheck':
270 next_subtask
= self
._call
_login
_api
(
271 'Submitting account duplication check', headers
, data
=build_login_json({
272 'subtask_id': next_subtask
,
273 'check_logged_in_account': {
274 'link': 'AccountDuplicationCheck_false',
278 elif next_subtask
== 'LoginTwoFactorAuthChallenge':
279 next_subtask
= self
._call
_login
_api
(
280 'Submitting 2FA token', headers
, data
=build_login_json(input_dict(
281 next_subtask
, self
._get
_tfa
_info
('two-factor authentication token'))))
283 elif next_subtask
== 'LoginAcid':
284 next_subtask
= self
._call
_login
_api
(
285 'Submitting confirmation code', headers
, data
=build_login_json(input_dict(
286 next_subtask
, self
._get
_tfa
_info
('confirmation code sent to your email or phone'))))
288 elif next_subtask
== 'ArkoseLogin':
289 self
.raise_login_required('Twitter is requiring captcha for this login attempt', method
='cookies')
291 elif next_subtask
== 'DenyLoginSubtask':
292 self
.raise_login_required('Twitter rejected this login attempt as suspicious', method
='cookies')
294 elif next_subtask
== 'LoginSuccessSubtask':
295 raise ExtractorError('Twitter API did not grant auth token cookie')
298 raise ExtractorError(f
'Unrecognized subtask ID "{next_subtask}"')
302 def _call_api(self
, path
, video_id
, query
={}, graphql
=False):
303 headers
= self
._set
_base
_headers
(legacy
=not graphql
and self
._selected
_api
== 'legacy')
305 'x-twitter-auth-type': 'OAuth2Session',
306 'x-twitter-client-language': 'en',
307 'x-twitter-active-user': 'yes',
308 } if self
.is_logged_in
else {
309 'x-guest-token': self
._fetch
_guest
_token
(video_id
),
311 allowed_status
= {400, 401, 403, 404} if graphql
else {403}
312 result
= self
._download
_json
(
313 (self
._GRAPHQL
_API
_BASE
if graphql
else self
._API
_BASE
) + path
,
314 video_id
, headers
=headers
, query
=query
, expected_status
=allowed_status
,
315 note
=f
'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
317 if result
.get('errors'):
318 errors
= ', '.join(set(traverse_obj(result
, ('errors', ..., 'message', {str}
))))
319 if errors
and 'not authorized' in errors
:
320 self
.raise_login_required(remove_end(errors
, '.'))
321 raise ExtractorError(f
'Error(s) while querying API: {errors or "Unknown error"}')
325 def _build_graphql_query(self
, media_id
):
326 raise NotImplementedError('Method must be implemented to support GraphQL')
328 def _call_graphql_api(self
, endpoint
, media_id
):
329 data
= self
._build
_graphql
_query
(media_id
)
330 query
= {key
: json
.dumps(value
, separators
=(',', ':')) for key
, value
in data
.items()}
331 return traverse_obj(self
._call
_api
(endpoint
, media_id
, query
=query
, graphql
=True), 'data')
334 class TwitterCardIE(InfoExtractor
):
335 IE_NAME
= 'twitter:card'
336 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
339 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
340 # MD5 checksums are different in different places
342 'id': '560070131976392705',
344 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
345 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
346 'uploader': 'Twitter',
347 'uploader_id': 'Twitter',
348 'thumbnail': r
're:^https?://.*\.jpg',
350 'timestamp': 1422366112,
351 'upload_date': '20150127',
353 'comment_count': int,
357 'display_id': '560070183650213889',
358 'uploader_url': 'https://twitter.com/Twitter',
362 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
363 'md5': '7137eca597f72b9abbe61e5ae0161399',
365 'id': '623160978427936768',
367 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
368 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
370 'uploader_id': 'NASA',
371 'timestamp': 1437408129,
372 'upload_date': '20150720',
373 'uploader_url': 'https://twitter.com/NASA',
375 'comment_count': int,
378 'tags': ['PlutoFlyby'],
380 'params': {'format': '[protocol=https]'},
383 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
384 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
388 'title': 'Ubuntu 11.10 Overview',
389 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
390 'upload_date': '20111013',
391 'uploader': 'OMG! UBUNTU!',
392 'uploader_id': 'omgubuntu',
393 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
394 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
395 'channel_follower_count': int,
396 'chapters': 'count:8',
397 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
399 'categories': ['Film & Animation'],
401 'comment_count': int,
402 'availability': 'public',
404 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
407 'channel': 'OMG! UBUNTU!',
408 'playable_in_embed': True,
410 'add_ie': ['Youtube'],
413 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
414 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
416 'id': '705235433198714880',
418 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
419 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
420 'uploader': 'Brent Yarina',
421 'uploader_id': 'BTNBrentYarina',
422 'timestamp': 1456976204,
423 'upload_date': '20160303',
425 'skip': 'This content is no longer available.',
428 'url': 'https://twitter.com/i/videos/752274308186120192',
429 'only_matching': True,
433 def _real_extract(self
, url
):
434 status_id
= self
._match
_id
(url
)
435 return self
.url_result(
436 'https://twitter.com/statuses/' + status_id
,
437 TwitterIE
.ie_key(), status_id
)
440 class TwitterIE(TwitterBaseIE
):
442 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
445 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
447 'id': '643211870443208704',
448 'display_id': '643211948184596480',
450 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
451 'thumbnail': r
're:^https?://.*\.jpg',
452 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
453 'channel_id': '549749560',
454 'uploader': 'FREE THE NIPPLE',
455 'uploader_id': 'freethenipple',
457 'timestamp': 1442188653,
458 'upload_date': '20150913',
459 'uploader_url': 'https://twitter.com/freethenipple',
460 'comment_count': int,
465 '_old_archive_ids': ['twitter 643211948184596480'],
467 'skip': 'Requires authentication',
469 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
470 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
472 'id': '657991469417025536',
474 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
475 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
476 'thumbnail': r
're:^https?://.*\.png',
478 'uploader_id': 'giphz',
480 'expected_warnings': ['height', 'width'],
481 'skip': 'Account suspended',
483 'url': 'https://twitter.com/starwars/status/665052190608723968',
485 'id': '665052190608723968',
486 'display_id': '665052190608723968',
488 'title': r
're:Star Wars.*A new beginning is coming December 18.*',
489 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
490 'channel_id': '20106852',
491 'uploader_id': 'starwars',
492 'uploader': r
're:Star Wars.*',
493 'timestamp': 1447395772,
494 'upload_date': '20151113',
495 'uploader_url': 'https://twitter.com/starwars',
496 'comment_count': int,
499 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
501 '_old_archive_ids': ['twitter 665052190608723968'],
504 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
506 'id': '705235433198714880',
508 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
509 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
510 'uploader_id': 'BTNBrentYarina',
511 'uploader': 'Brent Yarina',
512 'timestamp': 1456976204,
513 'upload_date': '20160303',
514 'uploader_url': 'https://twitter.com/BTNBrentYarina',
515 'comment_count': int,
522 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
523 # Test case of TwitterCardIE
524 'skip_download': True,
526 'skip': 'Dead external link',
528 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
530 'id': '700207414000242688',
531 'display_id': '700207533655363584',
533 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
534 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
535 'thumbnail': r
're:^https?://.*\.jpg',
536 'channel_id': '1383165541',
537 'uploader': 'jaydin donte geer',
538 'uploader_id': 'jaydingeer',
540 'timestamp': 1455777459,
541 'upload_date': '20160218',
542 'uploader_url': 'https://twitter.com/jaydingeer',
543 'comment_count': int,
546 'tags': ['Damndaniel'],
548 '_old_archive_ids': ['twitter 700207533655363584'],
551 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
553 'id': '717462543795523584',
554 'display_id': '719944021058060289',
556 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
557 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
558 'channel_id': '701615052',
559 'uploader_id': 'CaptainAmerica',
560 'uploader': 'Captain America',
562 'timestamp': 1460483005,
563 'upload_date': '20160412',
564 'uploader_url': 'https://twitter.com/CaptainAmerica',
565 'thumbnail': r
're:^https?://.*\.jpg',
566 'comment_count': int,
571 '_old_archive_ids': ['twitter 719944021058060289'],
574 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
576 'id': '1zqKVVlkqLaKB',
578 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
579 'upload_date': '20160923',
580 'uploader_id': '1PmKqpJdOJQoY',
581 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
582 'timestamp': 1474613214,
583 'thumbnail': r
're:^https?://.*\.jpg',
585 'add_ie': ['Periscope'],
586 'skip': 'Broadcast not found',
588 # has mp4 formats via mobile API
589 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
591 'id': '852077943283097602',
593 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
594 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
595 'channel_id': '2526757026',
596 'uploader': 'عالم الأخبار',
597 'uploader_id': 'news_al3alm',
599 'timestamp': 1492000653,
600 'upload_date': '20170412',
601 'display_id': '852138619213144067',
603 'uploader_url': 'https://twitter.com/news_al3alm',
604 'thumbnail': r
're:^https?://.*\.jpg',
608 'comment_count': int,
609 '_old_archive_ids': ['twitter 852138619213144067'],
612 'url': 'https://twitter.com/i/web/status/910031516746514432',
614 'id': '910030238373089285',
615 'display_id': '910031516746514432',
617 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
618 'thumbnail': r
're:^https?://.*\.jpg',
619 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
620 'channel_id': '2319432498',
621 'uploader': 'Préfet de Guadeloupe',
622 'uploader_id': 'Prefet971',
624 'timestamp': 1505803395,
625 'upload_date': '20170919',
626 'uploader_url': 'https://twitter.com/Prefet971',
627 'comment_count': int,
632 '_old_archive_ids': ['twitter 910031516746514432'],
635 'skip_download': True, # requires ffmpeg
638 # card via api.twitter.com/1.1/videos/tweet/config
639 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
641 'id': '1001551417340022785',
642 'display_id': '1001551623938805763',
644 'title': 're:.*?Shep is on a roll today.*?',
645 'thumbnail': r
're:^https?://.*\.jpg',
646 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
647 'channel_id': '255036353',
648 'uploader': 'Lis Power',
649 'uploader_id': 'LisPower1',
651 'timestamp': 1527623489,
652 'upload_date': '20180529',
653 'uploader_url': 'https://twitter.com/LisPower1',
654 'comment_count': int,
659 '_old_archive_ids': ['twitter 1001551623938805763'],
662 'skip_download': True, # requires ffmpeg
665 'url': 'https://twitter.com/foobar/status/1087791357756956680',
667 'id': '1087791272830607360',
668 'display_id': '1087791357756956680',
670 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
671 'thumbnail': r
're:^https?://.*\.jpg',
672 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
676 'timestamp': 1548184644,
677 'upload_date': '20190122',
678 'uploader_url': 'https://twitter.com/X',
679 'comment_count': int,
686 'skip': 'This Tweet is unavailable',
688 # not available in Periscope
689 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
691 'id': '1vOGwqejwoWxB',
693 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
695 'uploader_id': '1eVjYOLGkGrQL',
696 'thumbnail': r
're:^https?://.*\.jpg',
697 'tags': ['EduTECH2019'],
700 'add_ie': ['TwitterBroadcast'],
701 'skip': 'Broadcast no longer exists',
704 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
706 'id': '1349774757969989634',
707 'display_id': '1349794411333394432',
709 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
710 'thumbnail': r
're:^https?://.*\.jpg',
711 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
712 'channel_id': '18552281',
713 'uploader': 'Brooklyn Nets',
714 'uploader_id': 'BrooklynNets',
716 'timestamp': 1610651040,
717 'upload_date': '20210114',
718 'uploader_url': 'https://twitter.com/BrooklynNets',
719 'comment_count': int,
724 '_old_archive_ids': ['twitter 1349794411333394432'],
727 'skip_download': True,
730 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
732 'id': '1577855447914409984',
733 'display_id': '1577855540407197696',
735 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
736 'description': 'md5:b9c3699335447391d11753ab21c70a74',
737 'upload_date': '20221006',
738 'channel_id': '143077138',
739 'uploader': 'Oshtru',
740 'uploader_id': 'oshtru',
741 'uploader_url': 'https://twitter.com/oshtru',
742 'thumbnail': r
're:^https?://.*\.jpg',
744 'timestamp': 1665025050,
745 'comment_count': int,
750 '_old_archive_ids': ['twitter 1577855540407197696'],
752 'params': {'skip_download': True},
754 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
756 'id': '1577719286659006464',
757 'title': 'Ultima Reload - Test',
758 'description': 'Test https://t.co/Y3KEZD7Dad',
759 'channel_id': '168922496',
760 'uploader': 'Ultima Reload',
761 'uploader_id': 'UltimaShadowX',
762 'uploader_url': 'https://twitter.com/UltimaShadowX',
763 'upload_date': '20221005',
764 'timestamp': 1664992565,
765 'comment_count': int,
772 'params': {'skip_download': True},
774 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
776 'id': '1575559336759263233',
777 'display_id': '1575560063510810624',
779 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
780 'thumbnail': r
're:^https?://.*\.jpg',
781 'description': 'md5:95aea692fda36a12081b9629b02daa92',
782 'channel_id': '1094109584',
783 'uploader': 'Max Olson',
784 'uploader_id': 'MesoMax919',
785 'uploader_url': 'https://twitter.com/MesoMax919',
787 'timestamp': 1664477766,
788 'upload_date': '20220929',
789 'comment_count': int,
792 'tags': ['HurricaneIan'],
794 '_old_archive_ids': ['twitter 1575560063510810624'],
797 # Adult content, fails if not logged in
798 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
800 'id': '1575199163847000068',
801 'display_id': '1575199173472927762',
805 'channel_id': '1217167793541480450',
807 'uploader_id': 'Rizdraws',
808 'uploader_url': 'https://twitter.com/Rizdraws',
809 'upload_date': '20220928',
810 'timestamp': 1664391723,
811 'thumbnail': r
're:^https?://.+\.jpg',
814 'comment_count': int,
817 '_old_archive_ids': ['twitter 1575199173472927762'],
819 'params': {'skip_download': 'The media could not be played'},
820 'skip': 'Requires authentication',
822 # Playlist result only with graphql API
823 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
824 'playlist_mincount': 2,
826 'id': '1395079556562706435',
829 'channel_id': '21539378',
832 'upload_date': '20210519',
835 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
836 'uploader_id': 'Srirachachau',
837 'comment_count': int,
838 'uploader_url': 'https://twitter.com/Srirachachau',
839 'timestamp': 1621447860,
842 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
843 'playlist_mincount': 2,
845 'id': '1578353380363501568',
847 'channel_id': '2195866214',
848 'uploader_id': 'DavidToons_',
852 'timestamp': 1665143744,
853 'uploader_url': 'https://twitter.com/DavidToons_',
854 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
856 'comment_count': int,
857 'upload_date': '20221007',
861 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
864 'id': '1578401165338976258',
866 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
867 'channel_id': '19338359',
869 'uploader_id': 'primevideouk',
870 'timestamp': 1665155137,
871 'upload_date': '20221007',
873 'uploader_url': 'https://twitter.com/primevideouk',
874 'comment_count': int,
877 'tags': ['TheRingsOfPower'],
881 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
883 'id': '1lPJqmBeeNAJb',
885 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
886 'uploader': r
're:Monique Camarra.+?',
887 'uploader_id': 'MoniqueCamarra',
888 'live_status': 'was_live',
889 'release_timestamp': 1658417414,
890 'description': r
're:Twitter Space participated by Sergej Sumlenny.+',
891 'timestamp': 1658407771,
892 'release_date': '20220721',
893 'upload_date': '20220721',
895 'add_ie': ['TwitterSpaces'],
896 'params': {'skip_download': 'm3u8'},
898 # URL specifies video number but --yes-playlist
899 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
900 'playlist_mincount': 2,
902 'id': '1600649710662213632',
903 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
904 'timestamp': 1670459604.0,
905 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
906 'comment_count': int,
907 'uploader_id': 'CTVJLaidlaw',
908 'channel_id': '80082014',
910 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
911 'upload_date': '20221208',
913 'uploader': 'Jocelyn Laidlaw',
914 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
918 # URL specifies video number and --no-playlist
919 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
921 'id': '1600649511827013632',
923 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
924 'thumbnail': r
're:^https?://.+\.jpg',
925 'timestamp': 1670459604.0,
926 'channel_id': '80082014',
927 'uploader_id': 'CTVJLaidlaw',
928 'uploader': 'Jocelyn Laidlaw',
930 'comment_count': int,
931 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
933 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
934 'display_id': '1600649710662213632',
936 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
937 'upload_date': '20221208',
939 '_old_archive_ids': ['twitter 1600649710662213632'],
941 'params': {'noplaylist': True},
943 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
944 # note the id different between extraction and url
945 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
947 'id': '1621117577354424321',
948 'display_id': '1621117700482416640',
950 'title': '뽀 - 아 최우제 이동속도 봐',
951 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
953 'channel_id': '1281839411068432384',
955 'uploader_id': 's2FAKER',
956 'uploader_url': 'https://twitter.com/s2FAKER',
957 'upload_date': '20230202',
958 'timestamp': 1675339553.0,
959 'thumbnail': r
're:https?://pbs\.twimg\.com/.+',
964 'comment_count': int,
965 '_old_archive_ids': ['twitter 1621117700482416640'],
967 'skip': 'Requires authentication',
969 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
971 'id': '1599108643743473680',
972 'display_id': '1599108751385972737',
974 'title': '\u06ea - \U0001F48B',
975 'channel_id': '1347791436809441283',
976 'uploader_url': 'https://twitter.com/hlo_again',
978 'uploader_id': 'hlo_again',
979 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
982 'comment_count': int,
983 'upload_date': '20221203',
985 'timestamp': 1670092210.0,
987 'uploader': '\u06ea',
988 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
989 '_old_archive_ids': ['twitter 1599108751385972737'],
991 'params': {'noplaylist': True},
993 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
995 'id': '1600009362759733248',
996 'display_id': '1600009574919962625',
998 'channel_id': '211814412',
999 'uploader_url': 'https://twitter.com/MunTheShinobi',
1000 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1001 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1004 'repost_count': int,
1005 'upload_date': '20221206',
1006 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1007 'comment_count': int,
1010 'uploader_id': 'MunTheShinobi',
1011 'duration': 139.987,
1012 'timestamp': 1670306984.0,
1013 '_old_archive_ids': ['twitter 1600009574919962625'],
1016 # retweeted_status (private)
1017 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1019 'id': '1623274794488659969',
1020 'display_id': '1623739803874349067',
1022 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1023 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1024 'uploader': 'Johnny Bullets',
1025 'uploader_id': 'Johnnybull3ts',
1026 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1030 'timestamp': 1675853859.0,
1031 'upload_date': '20230208',
1032 'thumbnail': r
're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1034 'repost_count': int,
1036 'skip': 'Protected tweet',
1039 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1041 'id': '1694928337846538240',
1043 'display_id': '1695424220702888009',
1044 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1045 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1046 'channel_id': '15212187',
1047 'uploader': 'Benny Johnson',
1048 'uploader_id': 'bennyjohnson',
1049 'uploader_url': 'https://twitter.com/bennyjohnson',
1053 'timestamp': 1692962814.0,
1054 'upload_date': '20230825',
1055 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1057 'repost_count': int,
1058 'comment_count': int,
1059 '_old_archive_ids': ['twitter 1695424220702888009'],
1062 # retweeted_status w/ legacy API
1063 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1065 'id': '1694928337846538240',
1067 'display_id': '1695424220702888009',
1068 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1069 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1070 'channel_id': '15212187',
1071 'uploader': 'Benny Johnson',
1072 'uploader_id': 'bennyjohnson',
1073 'uploader_url': 'https://twitter.com/bennyjohnson',
1077 'timestamp': 1692962814.0,
1078 'upload_date': '20230825',
1079 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1081 'repost_count': int,
1082 '_old_archive_ids': ['twitter 1695424220702888009'],
1084 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1086 # Broadcast embedded in tweet
1087 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1089 'id': '1rmxPMjLzAXKN',
1091 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1092 'uploader': 'Jessica Dobson',
1093 'uploader_id': 'JessicaDobsonWX',
1094 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1095 'timestamp': 1701566398,
1096 'upload_date': '20231203',
1097 'live_status': 'was_live',
1098 'thumbnail': r
're:https://[^/]+pscp\.tv/.+\.jpg',
1099 'concurrent_view_count': int,
1102 'add_ie': ['TwitterBroadcast'],
1104 # Animated gif and quote tweet video
1105 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1106 'playlist_mincount': 2,
1108 'id': '1696256659889565950',
1109 'title': 'BAKOON - https://t.co/zom968d0a0',
1110 'description': 'https://t.co/zom968d0a0',
1112 'channel_id': '1263540390',
1113 'uploader': 'BAKOON',
1114 'uploader_id': 'BAKKOOONN',
1115 'uploader_url': 'https://twitter.com/BAKKOOONN',
1117 'timestamp': 1693254077.0,
1118 'upload_date': '20230828',
1120 'comment_count': int,
1121 'repost_count': int,
1123 'skip': 'Requires authentication',
1125 # "stale tweet" with typename "TweetWithVisibilityResults"
1126 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1127 'md5': '511377ff8dfa7545307084dca4dce319',
1129 'id': '1724883339285544960',
1131 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1132 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1133 'display_id': '1724884212803834154',
1134 'channel_id': '337808606',
1135 'uploader': 'Robert F. Kennedy Jr',
1136 'uploader_id': 'RobertKennedyJr',
1137 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1138 'upload_date': '20231115',
1139 'timestamp': 1700079417.0,
1140 'duration': 341.048,
1141 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1142 'tags': ['Kennedy24'],
1143 'repost_count': int,
1145 'comment_count': int,
1147 '_old_archive_ids': ['twitter 1724884212803834154'],
1151 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1152 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1154 'id': '1790637589910654976',
1156 'title': 'Historic Vids - One of the most intense moments in history',
1157 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1158 'display_id': '1790637656616943991',
1159 'uploader': 'Historic Vids',
1160 'uploader_id': 'historyinmemes',
1161 'uploader_url': 'https://twitter.com/historyinmemes',
1162 'channel_id': '855481986290524160',
1163 'upload_date': '20240515',
1164 'timestamp': 1715756260.0,
1167 'comment_count': int,
1168 'repost_count': int,
1170 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1172 '_old_archive_ids': ['twitter 1790637656616943991'],
1176 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1177 'only_matching': True,
1180 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1181 'only_matching': True,
1183 # promo_video_website card
1184 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1185 'only_matching': True,
1187 # promo_video_convo card
1188 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1189 'only_matching': True,
1192 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1193 'only_matching': True,
1195 # video_direct_message card
1196 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1197 'only_matching': True,
1199 # poll2choice_video card
1200 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1201 'only_matching': True,
1203 # poll3choice_video card
1204 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1205 'only_matching': True,
1207 # poll4choice_video card
1208 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1209 'only_matching': True,
1212 _MEDIA_ID_RE
= re
.compile(r
'_video/(\d+)/')
1215 def _GRAPHQL_ENDPOINT(self
):
1216 if self
.is_logged_in
:
1217 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1218 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1220 def _graphql_to_legacy(self
, data
, twid
):
1221 result
= traverse_obj(data
, (
1222 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1223 lambda _
, v
: v
['entryId'] == f
'tweet-{twid}', 'content', 'itemContent',
1224 'tweet_results', 'result', ('tweet', None), {dict}
,
1225 ), default
={}, get_all
=False) if self
.is_logged_in
else traverse_obj(
1226 data
, ('tweetResult', 'result', {dict}
), default
={})
1228 typename
= result
.get('__typename')
1229 if typename
not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1230 self
.report_warning(f
'Unknown typename: {typename}', twid
, only_once
=True)
1232 if 'tombstone' in result
:
1233 cause
= remove_end(traverse_obj(result
, ('tombstone', 'text', 'text', {str}
)), '. Learn more')
1234 raise ExtractorError(f
'Twitter API says: {cause or "Unknown error"}', expected
=True)
1235 elif typename
== 'TweetUnavailable':
1236 reason
= result
.get('reason')
1237 if reason
== 'NsfwLoggedOut':
1238 self
.raise_login_required('NSFW tweet requires authentication')
1239 elif reason
== 'Protected':
1240 self
.raise_login_required('You are not authorized to view this protected tweet')
1241 raise ExtractorError(reason
or 'Requested tweet is unavailable', expected
=True)
1242 # Result for "stale tweet" needs additional transformation
1243 elif typename
== 'TweetWithVisibilityResults':
1244 result
= traverse_obj(result
, ('tweet', {dict}
)) or {}
1246 status
= result
.get('legacy', {})
1247 status
.update(traverse_obj(result
, {
1248 'user': ('core', 'user_results', 'result', 'legacy'),
1249 'card': ('card', 'legacy'),
1250 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1251 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1252 }, expected_type
=dict, default
={}))
1254 # extra transformations needed since result does not match legacy format
1255 if status
.get('retweeted_status'):
1256 status
['retweeted_status']['user'] = traverse_obj(status
, (
1257 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict}
)) or {}
1260 binding_value
.get('key'): binding_value
.get('value')
1261 for binding_value
in traverse_obj(status
, ('card', 'binding_values', ..., {dict}
))
1264 status
['card']['binding_values'] = binding_values
1268 def _build_graphql_query(self
, media_id
):
1271 'focalTweetId': media_id
,
1272 'includePromotedContent': True,
1273 'with_rux_injections': False,
1274 'withBirdwatchNotes': True,
1275 'withCommunity': True,
1276 'withDownvotePerspective': False,
1277 'withQuickPromoteEligibilityTweetFields': True,
1278 'withReactionsMetadata': False,
1279 'withReactionsPerspective': False,
1280 'withSuperFollowsTweetFields': True,
1281 'withSuperFollowsUserFields': True,
1282 'withV2Timeline': True,
1286 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1287 'interactive_text_enabled': True,
1288 'responsive_web_edit_tweet_api_enabled': True,
1289 'responsive_web_enhance_cards_enabled': True,
1290 'responsive_web_graphql_timeline_navigation_enabled': False,
1291 'responsive_web_text_conversations_enabled': False,
1292 'responsive_web_uc_gql_enabled': True,
1293 'standardized_nudges_misinfo': True,
1294 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1295 'tweetypie_unmention_optimization_enabled': True,
1296 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1297 'verified_phone_label_enabled': False,
1298 'vibe_api_enabled': True,
1300 } if self
.is_logged_in
else {
1302 'tweetId': media_id
,
1303 'withCommunity': False,
1304 'includePromotedContent': False,
1308 'creator_subscriptions_tweet_preview_api_enabled': True,
1309 'tweetypie_unmention_optimization_enabled': True,
1310 'responsive_web_edit_tweet_api_enabled': True,
1311 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1312 'view_counts_everywhere_api_enabled': True,
1313 'longform_notetweets_consumption_enabled': True,
1314 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1315 'tweet_awards_web_tipping_enabled': False,
1316 'freedom_of_speech_not_reach_fetch_enabled': True,
1317 'standardized_nudges_misinfo': True,
1318 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1319 'longform_notetweets_rich_text_read_enabled': True,
1320 'longform_notetweets_inline_media_enabled': True,
1321 'responsive_web_graphql_exclude_directive_enabled': True,
1322 'verified_phone_label_enabled': False,
1323 'responsive_web_media_download_video_enabled': False,
1324 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1325 'responsive_web_graphql_timeline_navigation_enabled': True,
1326 'responsive_web_enhance_cards_enabled': False,
1329 'withArticleRichContentState': False,
1333 def _call_syndication_api(self
, twid
):
1334 self
.report_warning(
1335 'Not all metadata or media is available via syndication endpoint', twid
, only_once
=True)
1336 status
= self
._download
_json
(
1337 'https://cdn.syndication.twimg.com/tweet-result', twid
, 'Downloading syndication JSON',
1338 headers
={'User-Agent': 'Googlebot'}, query
={
1340 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1341 'token': ''.join(random
.choices('123456789abcdefghijklmnopqrstuvwxyz', k
=10)),
1344 raise ExtractorError('Syndication endpoint returned empty JSON response')
1345 # Transform the result so its structure matches that of legacy/graphql
1347 for detail
in traverse_obj(status
, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict}
)):
1348 detail
['id_str'] = traverse_obj(detail
, (
1349 'video_info', 'variants', ..., 'url', {self
._MEDIA
_ID
_RE
.search
}, 1), get_all
=False) or twid
1350 media
.append(detail
)
1351 status
['extended_entities'] = {'media': media
}
1355 def _extract_status(self
, twid
):
1356 if self
._selected
_api
not in ('graphql', 'legacy', 'syndication'):
1357 raise ExtractorError(f
'{self._selected_api!r} is not a valid API selection', expected
=True)
1360 if self
.is_logged_in
or self
._selected
_api
== 'graphql':
1361 status
= self
._graphql
_to
_legacy
(self
._call
_graphql
_api
(self
._GRAPHQL
_ENDPOINT
, twid
), twid
)
1362 elif self
._selected
_api
== 'legacy':
1363 status
= self
._call
_api
(f
'statuses/show/{twid}.json', twid
, {
1364 'cards_platform': 'Web-12',
1366 'include_reply_count': 1,
1367 'include_user_entities': 0,
1368 'tweet_mode': 'extended',
1370 except ExtractorError
as e
:
1371 if not isinstance(e
.cause
, HTTPError
) or not e
.cause
.status
== 429:
1373 self
.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1374 status
= self
._call
_syndication
_api
(twid
)
1376 if self
._selected
_api
== 'syndication':
1377 status
= self
._call
_syndication
_api
(twid
)
1379 return traverse_obj(status
, 'retweeted_status', None, expected_type
=dict) or {}
1381 def _real_extract(self
, url
):
1382 twid
, selected_index
= self
._match
_valid
_url
(url
).group('id', 'index')
1383 status
= self
._extract
_status
(twid
)
1385 title
= description
= traverse_obj(
1386 status
, (('full_text', 'text'), {lambda x
: x
.replace('\n', ' ')}), get_all
=False) or ''
1387 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1388 title
= re
.sub(r
'\s+(https?://[^ ]+)', '', title
)
1389 user
= status
.get('user') or {}
1390 uploader
= user
.get('name')
1392 title
= f
'{uploader} - {title}'
1393 uploader_id
= user
.get('screen_name')
1398 'description': description
,
1399 'uploader': uploader
,
1400 'timestamp': unified_timestamp(status
.get('created_at')),
1401 'channel_id': str_or_none(status
.get('user_id_str')) or str_or_none(user
.get('id_str')),
1402 'uploader_id': uploader_id
,
1403 'uploader_url': format_field(uploader_id
, None, 'https://twitter.com/%s'),
1404 'like_count': int_or_none(status
.get('favorite_count')),
1405 'repost_count': int_or_none(status
.get('retweet_count')),
1406 'comment_count': int_or_none(status
.get('reply_count')),
1407 'age_limit': 18 if status
.get('possibly_sensitive') else 0,
1408 'tags': traverse_obj(status
, ('entities', 'hashtags', ..., 'text')),
1411 def extract_from_video_info(media
):
1412 media_id
= traverse_obj(media
, 'id_str', 'id', expected_type
=str_or_none
)
1413 self
.write_debug(f
'Extracting from video info: {media_id}')
1417 for variant
in traverse_obj(media
, ('video_info', 'variants', ...)):
1418 fmts
, subs
= self
._extract
_variant
_formats
(variant
, twid
)
1419 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
1420 formats
.extend(fmts
)
1423 media_url
= media
.get('media_url_https') or media
.get('media_url')
1425 def add_thumbnail(name
, size
):
1428 'url': update_url_query(media_url
, {'name': name
}),
1429 'width': int_or_none(size
.get('w') or size
.get('width')),
1430 'height': int_or_none(size
.get('h') or size
.get('height')),
1432 for name
, size
in media
.get('sizes', {}).items():
1433 add_thumbnail(name
, size
)
1434 add_thumbnail('orig', media
.get('original_info') or {})
1439 'subtitles': subtitles
,
1440 'thumbnails': thumbnails
,
1441 'view_count': traverse_obj(media
, ('mediaStats', 'viewCount', {int_or_none}
)), # No longer available
1442 'duration': float_or_none(traverse_obj(media
, ('video_info', 'duration_millis')), 1000),
1443 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1444 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1447 def extract_from_card_info(card
):
1451 self
.write_debug(f
'Extracting from card info: {card.get("url")}')
1452 binding_values
= card
['binding_values']
1454 def get_binding_value(k
):
1455 o
= binding_values
.get(k
) or {}
1456 return try_get(o
, lambda x
: x
[x
['type'].lower() + '_value'])
1458 card_name
= card
['name'].split(':')[-1]
1459 if card_name
== 'player':
1462 'url': get_binding_value('player_url'),
1464 elif card_name
== 'periscope_broadcast':
1467 'url': get_binding_value('url') or get_binding_value('player_url'),
1468 'ie_key': PeriscopeIE
.ie_key(),
1470 elif card_name
== 'broadcast':
1473 'url': get_binding_value('broadcast_url'),
1474 'ie_key': TwitterBroadcastIE
.ie_key(),
1476 elif card_name
== 'audiospace':
1479 'url': f
'https://twitter.com/i/spaces/{get_binding_value("id")}',
1480 'ie_key': TwitterSpacesIE
.ie_key(),
1482 elif card_name
== 'summary':
1485 'url': get_binding_value('card_url'),
1487 elif card_name
== 'unified_card':
1488 unified_card
= self
._parse
_json
(get_binding_value('unified_card'), twid
)
1489 yield from map(extract_from_video_info
, traverse_obj(
1490 unified_card
, ('media_entities', ...), expected_type
=dict))
1491 # amplify, promo_video_website, promo_video_convo, appplayer,
1492 # video_direct_message, poll2choice_video, poll3choice_video,
1493 # poll4choice_video, ...
1495 is_amplify
= card_name
== 'amplify'
1496 vmap_url
= get_binding_value('amplify_url_vmap') if is_amplify
else get_binding_value('player_stream_url')
1497 content_id
= get_binding_value('%s_content_id' % (card_name
if is_amplify
else 'player'))
1498 formats
, subtitles
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, content_id
or twid
)
1501 for suffix
in ('_small', '', '_large', '_x_large', '_original'):
1502 image
= get_binding_value('player_image' + suffix
) or {}
1503 image_url
= image
.get('url')
1504 if not image_url
or '/player-placeholder' in image_url
:
1507 'id': suffix
[1:] if suffix
else 'medium',
1509 'width': int_or_none(image
.get('width')),
1510 'height': int_or_none(image
.get('height')),
1515 'subtitles': subtitles
,
1516 'thumbnails': thumbnails
,
1517 'duration': int_or_none(get_binding_value(
1518 'content_duration_seconds')),
1521 videos
= traverse_obj(status
, (
1522 (None, 'quoted_status'), 'extended_entities', 'media', lambda _
, m
: m
['type'] != 'photo', {dict}
))
1524 if self
._yes
_playlist
(twid
, selected_index
, video_label
='URL-specified video number'):
1525 selected_entries
= (*map(extract_from_video_info
, videos
), *extract_from_card_info(status
.get('card')))
1527 desired_obj
= traverse_obj(status
, (
1528 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index
) - 1, {dict}
), get_all
=False)
1530 raise ExtractorError(f
'Video #{selected_index} is unavailable', expected
=True)
1531 elif desired_obj
.get('type') != 'video':
1532 raise ExtractorError(f
'Media #{selected_index} is not a video', expected
=True)
1534 # Restore original archive id and video index in title
1535 for index
, entry
in enumerate(videos
, 1):
1536 if entry
.get('id') != desired_obj
.get('id'):
1539 info
['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1540 if len(videos
) != 1:
1541 info
['title'] += f
' #{index}'
1544 return {**info
, **extract_from_video_info(desired_obj
), 'display_id': twid
}
1546 entries
= [{**info
, **data
, 'display_id': twid
} for data
in selected_entries
]
1548 expanded_url
= traverse_obj(status
, ('entities', 'urls', 0, 'expanded_url'), expected_type
=url_or_none
)
1549 if not expanded_url
or expanded_url
== url
:
1550 self
.raise_no_formats('No video could be found in this tweet', expected
=True)
1553 return self
.url_result(expanded_url
, display_id
=twid
, **info
)
1555 entries
[0]['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1557 if len(entries
) == 1:
1560 for index
, entry
in enumerate(entries
, 1):
1561 entry
['title'] += f
' #{index}'
1563 return self
.playlist_result(entries
, **info
)
1566 class TwitterAmplifyIE(TwitterBaseIE
):
1567 IE_NAME
= 'twitter:amplify'
1568 _VALID_URL
= r
'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1571 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1572 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1574 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1576 'title': 'Twitter Video',
1577 'thumbnail': 're:^https?://.*',
1579 'params': {'format': '[protocol=https]'},
1582 def _real_extract(self
, url
):
1583 video_id
= self
._match
_id
(url
)
1584 webpage
= self
._download
_webpage
(url
, video_id
)
1586 vmap_url
= self
._html
_search
_meta
(
1587 'twitter:amplify:vmap', webpage
, 'vmap url')
1588 formats
, _
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, video_id
)
1591 thumbnail
= self
._html
_search
_meta
(
1592 'twitter:image:src', webpage
, 'thumbnail', fatal
=False)
1594 def _find_dimension(target
):
1595 w
= int_or_none(self
._html
_search
_meta
(
1596 f
'twitter:{target}:width', webpage
, fatal
=False))
1597 h
= int_or_none(self
._html
_search
_meta
(
1598 f
'twitter:{target}:height', webpage
, fatal
=False))
1602 thumbnail_w
, thumbnail_h
= _find_dimension('image')
1605 'width': thumbnail_w
,
1606 'height': thumbnail_h
,
1609 video_w
, video_h
= _find_dimension('player')
1617 'title': 'Twitter Video',
1619 'thumbnails': thumbnails
,
1623 class TwitterBroadcastIE(TwitterBaseIE
, PeriscopeBaseIE
):
1624 IE_NAME
= 'twitter:broadcast'
1625 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1628 # untitled Periscope video
1629 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1631 'id': '1yNGaQLWpejGj',
1633 'title': 'Andrea May Sahouri - Periscope Broadcast',
1634 'uploader': 'Andrea May Sahouri',
1635 'uploader_id': 'andreamsahouri',
1636 'uploader_url': 'https://twitter.com/andreamsahouri',
1637 'timestamp': 1590973638,
1638 'upload_date': '20200601',
1639 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1643 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1645 'id': '1ZkKzeyrPbaxv',
1647 'title': 'Starship | SN10 | High-Altitude Flight Test',
1648 'uploader': 'SpaceX',
1649 'uploader_id': 'SpaceX',
1650 'uploader_url': 'https://twitter.com/SpaceX',
1651 'timestamp': 1614812942,
1652 'upload_date': '20210303',
1653 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1657 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1659 'id': '1OyKAVQrgzwGb',
1661 'title': 'Starship Flight Test',
1662 'uploader': 'SpaceX',
1663 'uploader_id': 'SpaceX',
1664 'uploader_url': 'https://twitter.com/SpaceX',
1665 'timestamp': 1681993964,
1666 'upload_date': '20230420',
1667 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1672 def _real_extract(self
, url
):
1673 broadcast_id
= self
._match
_id
(url
)
1674 broadcast
= self
._call
_api
(
1675 'broadcasts/show.json', broadcast_id
,
1676 {'ids': broadcast_id
})['broadcasts'][broadcast_id
]
1678 raise ExtractorError('Broadcast no longer exists', expected
=True)
1679 info
= self
._parse
_broadcast
_data
(broadcast
, broadcast_id
)
1680 info
['title'] = broadcast
.get('status') or info
.get('title')
1681 info
['uploader_id'] = broadcast
.get('twitter_username') or info
.get('uploader_id')
1682 info
['uploader_url'] = format_field(broadcast
, 'twitter_username', 'https://twitter.com/%s', default
=None)
1683 if info
['live_status'] == 'is_upcoming':
1686 media_key
= broadcast
['media_key']
1687 source
= self
._call
_api
(
1688 f
'live_video_stream/status/{media_key}', media_key
)['source']
1689 m3u8_url
= source
.get('noRedirectPlaybackUrl') or source
['location']
1690 if '/live_video_stream/geoblocked/' in m3u8_url
:
1691 self
.raise_geo_restricted()
1692 m3u8_id
= urllib
.parse
.parse_qs(urllib
.parse
.urlparse(
1693 m3u8_url
).query
).get('type', [None])[0]
1694 state
, width
, height
= self
._extract
_common
_format
_info
(broadcast
)
1695 info
['formats'] = self
._extract
_pscp
_m
3u8_formats
(
1696 m3u8_url
, broadcast_id
, m3u8_id
, state
, width
, height
)
1700 class TwitterSpacesIE(TwitterBaseIE
):
1701 IE_NAME
= 'twitter:spaces'
1702 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1705 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1707 'id': '1RDxlgyvNXzJL',
1709 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1710 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1711 'uploader': r
're:Lucio Di Gaetano.*?',
1712 'uploader_id': 'luciodigaetano',
1713 'live_status': 'was_live',
1714 'timestamp': 1659877956,
1715 'upload_date': '20220807',
1716 'release_timestamp': 1659904215,
1717 'release_date': '20220807',
1719 'skip': 'No longer available',
1721 # post_live/TimedOut but downloadable
1722 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1724 'id': '1vAxRAVQWONJl',
1726 'title': 'Framing Up FinOps: Billing Tools',
1727 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1728 'uploader': 'Google Cloud',
1729 'uploader_id': 'googlecloud',
1730 'live_status': 'post_live',
1731 'timestamp': 1681409554,
1732 'upload_date': '20230413',
1733 'release_timestamp': 1681839000,
1734 'release_date': '20230418',
1735 'protocol': 'm3u8', # ffmpeg is forced
1736 'container': 'm4a_dash', # audio-only format fixup is applied
1738 'params': {'skip_download': 'm3u8'},
1740 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1741 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1743 'id': '1eaKbrQbjoRKX',
1746 'description': 'Twitter Space participated by nobody yet',
1747 'uploader': '息根とめる',
1748 'uploader_id': 'tomeru_ikinone',
1749 'live_status': 'was_live',
1750 'timestamp': 1685617198,
1751 'upload_date': '20230601',
1752 'protocol': 'm3u8', # ffmpeg is forced
1753 'container': 'm4a_dash', # audio-only format fixup is applied
1755 'params': {'skip_download': 'm3u8'},
1758 'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
1760 'id': '1DXGydznBYWKM',
1762 'title': 'America and Israel’s “special relationship”',
1763 'description': 'Twitter Space participated by nobody yet',
1764 'uploader': 'Candace Owens',
1765 'uploader_id': 'RealCandaceO',
1766 'live_status': 'was_live',
1767 'timestamp': 1723931351,
1768 'upload_date': '20240817',
1769 'release_timestamp': 1723932000,
1770 'release_date': '20240817',
1771 'protocol': 'm3u8_native', # not ffmpeg, detected as video space
1773 'params': {'skip_download': 'm3u8'},
1777 'notstarted': 'is_upcoming',
1778 'ended': 'was_live',
1779 'running': 'is_live',
1780 'timedout': 'post_live',
1783 def _build_graphql_query(self
, space_id
):
1787 'isMetatagsQuery': True,
1788 'withDownvotePerspective': False,
1789 'withReactionsMetadata': False,
1790 'withReactionsPerspective': False,
1791 'withReplays': True,
1792 'withSuperFollowsUserFields': True,
1793 'withSuperFollowsTweetFields': True,
1796 'dont_mention_me_view_api_enabled': True,
1797 'interactive_text_enabled': True,
1798 'responsive_web_edit_tweet_api_enabled': True,
1799 'responsive_web_enhance_cards_enabled': True,
1800 'responsive_web_uc_gql_enabled': True,
1801 'spaces_2022_h2_clipping': True,
1802 'spaces_2022_h2_spaces_communities': False,
1803 'standardized_nudges_misinfo': True,
1804 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1805 'vibe_api_enabled': True,
1809 def _real_extract(self
, url
):
1810 space_id
= self
._match
_id
(url
)
1811 space_data
= self
._call
_graphql
_api
('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id
)['audioSpace']
1813 raise ExtractorError('Twitter Space not found', expected
=True)
1815 metadata
= space_data
['metadata']
1816 live_status
= try_call(lambda: self
.SPACE_STATUS
[metadata
['state'].lower()])
1817 is_live
= live_status
== 'is_live'
1820 headers
= {'Referer': 'https://twitter.com/'}
1821 if live_status
== 'is_upcoming':
1822 self
.raise_no_formats('Twitter Space not started yet', expected
=True)
1823 elif not is_live
and not metadata
.get('is_space_available_for_replay'):
1824 self
.raise_no_formats('Twitter Space ended and replay is disabled', expected
=True)
1825 elif metadata
.get('media_key'):
1826 source
= traverse_obj(
1827 self
._call
_api
(f
'live_video_stream/status/{metadata["media_key"]}', metadata
['media_key']),
1828 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}
), get_all
=False)
1829 is_audio_space
= source
and 'audio-space' in source
1830 formats
= self
._extract
_m
3u8_formats
(
1831 source
, metadata
['media_key'], 'm4a' if is_audio_space
else 'mp4',
1832 # XXX: Some audio-only Spaces need ffmpeg as downloader
1833 entry_protocol
='m3u8' if is_audio_space
else 'm3u8_native',
1834 live
=is_live
, headers
=headers
, fatal
=False) if source
else []
1837 fmt
.update({'vcodec': 'none', 'acodec': 'aac'})
1839 fmt
['container'] = 'm4a_dash'
1841 participants
= ', '.join(traverse_obj(
1842 space_data
, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1844 if not formats
and live_status
== 'post_live':
1845 self
.raise_no_formats('Twitter Space ended but not downloadable yet', expected
=True)
1849 'title': metadata
.get('title'),
1850 'description': f
'Twitter Space participated by {participants}',
1851 'uploader': traverse_obj(
1852 metadata
, ('creator_results', 'result', 'legacy', 'name')),
1853 'uploader_id': traverse_obj(
1854 metadata
, ('creator_results', 'result', 'legacy', 'screen_name')),
1855 'live_status': live_status
,
1856 'release_timestamp': try_call(
1857 lambda: int_or_none(metadata
['scheduled_start'], scale
=1000)),
1858 'timestamp': int_or_none(metadata
.get('created_at'), scale
=1000),
1860 'http_headers': headers
,
1864 class TwitterShortenerIE(TwitterBaseIE
):
1865 IE_NAME
= 'twitter:shortener'
1866 _VALID_URL
= r
'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1867 _BASE_URL
= 'https://t.co/'
1869 def _real_extract(self
, url
):
1870 mobj
= self
._match
_valid
_url
(url
)
1871 eid
, shortcode
= mobj
.group('eid', 'id')
1874 url
= self
._BASE
_URL
+ shortcode
1875 new_url
= self
._request
_webpage
(url
, shortcode
, headers
={'User-Agent': 'curl'}).url
1876 __UNSAFE_LINK
= 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
1877 if new_url
.startswith(__UNSAFE_LINK
):
1878 new_url
= new_url
.replace(__UNSAFE_LINK
, '')
1879 return self
.url_result(new_url
)