1 from .common
import InfoExtractor
15 from ..utils
.traversal
import traverse_obj
18 class BlueskyIE(InfoExtractor
):
20 r
'https?://(?:www\.)?(?:bsky\.app|main\.bsky\.dev)/profile/(?P<handle>[\w.:%-]+)/post/(?P<id>\w+)',
21 r
'at://(?P<handle>[\w.:%-]+)/app\.bsky\.feed\.post/(?P<id>\w+)',
24 'url': 'https://bsky.app/profile/blu3blue.bsky.social/post/3l4omssdl632g',
25 'md5': '375539c1930ab05d15585ed772ab54fd',
27 'id': '3l4omssdl632g',
29 'uploader': 'Blu3Blu3Lilith',
30 'uploader_id': 'blu3blue.bsky.social',
31 'uploader_url': 'https://bsky.app/profile/blu3blue.bsky.social',
32 'channel_id': 'did:plc:pzdr5ylumf7vmvwasrpr5bf2',
33 'channel_url': 'https://bsky.app/profile/did:plc:pzdr5ylumf7vmvwasrpr5bf2',
34 'thumbnail': r
're:https://video.bsky.app/watch/.*\.jpg$',
35 'title': 'OMG WE HAVE VIDEOS NOW',
36 'description': 'OMG WE HAVE VIDEOS NOW',
37 'upload_date': '20240921',
38 'timestamp': 1726940605,
45 'url': 'https://bsky.app/profile/bsky.app/post/3l3vgf77uco2g',
46 'md5': 'b9e344fdbce9f2852c668a97efefb105',
48 'id': '3l3vgf77uco2g',
50 'uploader': 'Bluesky',
51 'uploader_id': 'bsky.app',
52 'uploader_url': 'https://bsky.app/profile/bsky.app',
53 'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
54 'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
55 'thumbnail': r
're:https://video.bsky.app/watch/.*\.jpg$',
56 'title': 'Bluesky now has video! Update your app to versi...',
57 'alt_title': 'Bluesky video feature announcement',
58 'description': r
're:(?s)Bluesky now has video! .{239}',
59 'upload_date': '20240911',
60 'timestamp': 1726074716,
70 'url': 'https://main.bsky.dev/profile/souris.moe/post/3l4qhp7bcs52c',
71 'md5': '5f2df8c200b5633eb7fb2c984d29772f',
73 'id': '3l4qhp7bcs52c',
76 'uploader_id': 'souris.moe',
77 'uploader_url': 'https://bsky.app/profile/souris.moe',
78 'channel_id': 'did:plc:tj7g244gl5v6ai6cm4f4wlqp',
79 'channel_url': 'https://bsky.app/profile/did:plc:tj7g244gl5v6ai6cm4f4wlqp',
80 'thumbnail': r
're:https://video.bsky.app/watch/.*\.jpg$',
81 'title': 'Bluesky video #3l4qhp7bcs52c',
82 'upload_date': '20240922',
83 'timestamp': 1727003838,
90 'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
91 'md5': 'cc0110ed1f6b0247caac8234cc1e861d',
93 'id': '3l3w4tnezek2e',
96 'uploader_id': 'de1.pds.tentacle.expert',
97 'uploader_url': 'https://bsky.app/profile/de1.pds.tentacle.expert',
98 'channel_id': 'did:web:de1.tentacle.expert',
99 'channel_url': 'https://bsky.app/profile/did:web:de1.tentacle.expert',
100 'thumbnail': r
're:https://video.bsky.app/watch/.*\.jpg$',
101 'title': 'Bluesky video #3l3w4tnezek2e',
102 'upload_date': '20240911',
103 'timestamp': 1726098823,
106 'comment_count': int,
110 'url': 'https://bsky.app/profile/yunayuispink.bsky.social/post/3l7gqcfes742o',
114 'uploader': 'yunayu',
115 'uploader_id': '@yunayuispink',
116 'uploader_url': 'https://www.youtube.com/@yunayuispink',
118 'channel_id': 'UCPLvXnHa7lTyNoR_dGsU14w',
119 'channel_url': 'https://www.youtube.com/channel/UCPLvXnHa7lTyNoR_dGsU14w',
120 'thumbnail': 'https://i.ytimg.com/vi_webp/XxK3t_5V3ao/maxresdefault.webp',
121 'description': r
're:Have a good goodx10000day',
122 'title': '5min vs 5hours drawing',
123 'availability': 'public',
124 'live_status': 'not_live',
125 'playable_in_embed': True,
126 'upload_date': '20241026',
127 'timestamp': 1729967784,
132 'comment_count': int,
133 'channel_follower_count': int,
134 'categories': ['Entertainment'],
137 'heatmap': 'count:100',
139 'add_ie': ['Youtube'],
141 'url': 'https://bsky.app/profile/endshark.bsky.social/post/3jzxjkcemae2m',
145 'uploader': 'LASERBAT',
146 'uploader_id': 'laserbatx',
147 'uploader_url': 'https://laserbatx.bandcamp.com',
148 'artists': ['LASERBAT'],
149 'album_artists': ['LASERBAT'],
150 'album': 'Hari Nezumi [EP]',
151 'track': 'Forward to the End',
152 'title': 'LASERBAT - Forward to the End',
153 'thumbnail': 'https://f4.bcbits.com/img/a2507705510_5.jpg',
155 'track_id': '222792849',
156 'release_date': '20230423',
157 'upload_date': '20230423',
158 'timestamp': 1682276040.0,
159 'release_timestamp': 1682276040.0,
162 'add_ie': ['Bandcamp'],
164 'url': 'https://bsky.app/profile/dannybhoix.bsky.social/post/3l6oe5mtr2c2j',
165 'md5': 'b9e344fdbce9f2852c668a97efefb105',
167 'id': '3l3vgf77uco2g',
169 'uploader': 'Bluesky',
170 'uploader_id': 'bsky.app',
171 'uploader_url': 'https://bsky.app/profile/bsky.app',
172 'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
173 'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
174 'thumbnail': r
're:https://video.bsky.app/watch/.*\.jpg$',
175 'title': 'Bluesky now has video! Update your app to versi...',
176 'alt_title': 'Bluesky video feature announcement',
177 'description': r
're:(?s)Bluesky now has video! .{239}',
178 'upload_date': '20240911',
179 'timestamp': 1726074716,
182 'comment_count': int,
189 'url': 'https://bsky.app/profile/cinny.bun.how/post/3l7rdfxhyds2f',
190 'md5': '8775118b235cf9fa6b5ad30f95cda75c',
192 'id': '3l7rdfxhyds2f',
194 'uploader': 'cinnamon',
195 'uploader_id': 'cinny.bun.how',
196 'uploader_url': 'https://bsky.app/profile/cinny.bun.how',
197 'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
198 'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
199 'thumbnail': r
're:https://video.bsky.app/watch/.*\.jpg$',
200 'title': 'crazy that i look like this tbh',
201 'description': 'crazy that i look like this tbh',
202 'upload_date': '20241030',
203 'timestamp': 1730332128,
206 'comment_count': int,
211 'url': 'at://did:plc:ia76kvnndjutgedggx2ibrem/app.bsky.feed.post/3l6zrz6zyl2dr',
212 'md5': '71b0eb6d85d03145e6af6642c7fc6d78',
214 'id': '3l6zrz6zyl2dr',
217 'uploader_id': 'mary.my.id',
218 'uploader_url': 'https://bsky.app/profile/mary.my.id',
219 'channel_id': 'did:plc:ia76kvnndjutgedggx2ibrem',
220 'channel_url': 'https://bsky.app/profile/did:plc:ia76kvnndjutgedggx2ibrem',
221 'thumbnail': r
're:https://video.bsky.app/watch/.*\.jpg$',
222 'title': 'Bluesky video #3l6zrz6zyl2dr',
223 'upload_date': '20241021',
224 'timestamp': 1729523172,
227 'comment_count': int,
231 'url': 'https://bsky.app/profile/purpleicetea.bsky.social/post/3l7gv55dc2o2w',
233 'id': '3l7gv55dc2o2w',
237 'id': '3l7gv55dc2o2w',
239 'upload_date': '20241026',
240 'description': 'One of my favorite videos',
241 'comment_count': int,
242 'uploader_url': 'https://bsky.app/profile/purpleicetea.bsky.social',
243 'uploader': 'Purple.Ice.Tea',
244 'thumbnail': r
're:https://video.bsky.app/watch/.*\.jpg$',
245 'channel_url': 'https://bsky.app/profile/did:plc:bjh5ffwya5f53dfy47dezuwx',
247 'channel_id': 'did:plc:bjh5ffwya5f53dfy47dezuwx',
249 'timestamp': 1729973202,
251 'uploader_id': 'purpleicetea.bsky.social',
252 'title': 'One of my favorite videos',
256 'id': '3l77u64l7le2e',
258 'title': 'hearing people on twitter say that bluesky isn\'...',
260 'uploader_id': 'thafnine.net',
261 'uploader_url': 'https://bsky.app/profile/thafnine.net',
262 'upload_date': '20241024',
263 'channel_url': 'https://bsky.app/profile/did:plc:6ttyq36rhiyed7wu3ws7dmqj',
264 'description': r
're:(?s)hearing people on twitter say that bluesky .{93}',
266 'alt_title': 'md5:9b1ee1937fb3d1a81e932f9ec14d560e',
268 'channel_id': 'did:plc:6ttyq36rhiyed7wu3ws7dmqj',
269 'thumbnail': r
're:https://video.bsky.app/watch/.*\.jpg$',
270 'timestamp': 1729731642,
271 'comment_count': int,
276 _BLOB_URL_TMPL
= '{}/xrpc/com.atproto.sync.getBlob'
278 def _get_service_endpoint(self
, did
, video_id
):
279 if did
.startswith('did:web:'):
280 url
= f
'https://{did[8:]}/.well-known/did.json'
282 url
= f
'https://plc.directory/{did}'
283 services
= self
._download
_json
(
284 url
, video_id
, 'Fetching service endpoint', 'Falling back to bsky.social', fatal
=False)
286 services
, ('service', lambda _
, x
: x
['type'] == 'AtprotoPersonalDataServer',
287 'serviceEndpoint', {url_or_none}
, any
)) or 'https://bsky.social'
289 def _extract_post(self
, handle
, post_id
):
290 return self
._download
_json
(
291 'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
293 'uri': f
'at://{handle}/app.bsky.feed.post/{post_id}',
298 def _real_extract(self
, url
):
299 handle
, video_id
= self
._match
_valid
_url
(url
).group('handle', 'id')
300 post
= self
._extract
_post
(handle
, video_id
)
303 # app.bsky.embed.video.view/app.bsky.embed.external.view
304 entries
.extend(self
._extract
_videos
(post
, video_id
))
305 # app.bsky.embed.recordWithMedia.view
306 entries
.extend(self
._extract
_videos
(
307 post
, video_id
, embed_path
=('embed', 'media'), record_subpath
=('embed', 'media')))
308 # app.bsky.embed.record.view
309 if nested_post
:= traverse_obj(post
, ('embed', 'record', ('record', None), {dict}
, any
)):
310 entries
.extend(self
._extract
_videos
(
311 nested_post
, video_id
, embed_path
=('embeds', 0), record_path
='value'))
314 raise ExtractorError('No video could be found in this post', expected
=True)
315 if len(entries
) == 1:
317 return self
.playlist_result(entries
, video_id
)
320 def _build_profile_url(path
):
321 return format_field(path
, None, 'https://bsky.app/profile/%s', default
=None)
323 def _extract_videos(self
, root
, video_id
, embed_path
='embed', record_path
='record', record_subpath
='embed'):
324 embed_path
= variadic(embed_path
, (str, bytes
, dict, set))
325 record_path
= variadic(record_path
, (str, bytes
, dict, set))
326 record_subpath
= variadic(record_subpath
, (str, bytes
, dict, set))
329 if external_uri
:= traverse_obj(root
, (
330 ((*record_path
, *record_subpath
), embed_path
), 'external', 'uri', {url_or_none}
, any
)):
331 entries
.append(self
.url_result(external_uri
))
332 if playlist
:= traverse_obj(root
, (*embed_path
, 'playlist', {url_or_none}
)):
333 formats
, subtitles
= self
._extract
_m
3u8_formats
_and
_subtitles
(
334 playlist
, video_id
, 'mp4', m3u8_id
='hls', fatal
=False)
338 video_cid
= traverse_obj(
339 root
, (*embed_path
, 'cid', {str}
),
340 (*record_path
, *record_subpath
, 'video', 'ref', '$link', {str}
))
341 did
= traverse_obj(root
, ('author', 'did', {str}
))
343 if did
and video_cid
:
344 endpoint
= self
._get
_service
_endpoint
(did
, video_id
)
349 'url': update_url_query(
350 self
._BLOB
_URL
_TMPL
.format(endpoint
), {'did': did
, 'cid': video_cid
}),
351 **traverse_obj(root
, (*embed_path
, 'aspectRatio', {
352 'width': ('width', {int_or_none}
),
353 'height': ('height', {int_or_none}
),
355 **traverse_obj(root
, (*record_path
, *record_subpath
, 'video', {
356 'filesize': ('size', {int_or_none}
),
357 'ext': ('mimeType', {mimetype2ext}
),
361 for sub_data
in traverse_obj(root
, (
362 *record_path
, *record_subpath
, 'captions', lambda _
, v
: v
['file']['ref']['$link'])):
363 subtitles
.setdefault(sub_data
.get('lang') or 'und', []).append({
364 'url': update_url_query(
365 self
._BLOB
_URL
_TMPL
.format(endpoint
), {'did': did
, 'cid': sub_data
['file']['ref']['$link']}),
366 'ext': traverse_obj(sub_data
, ('file', 'mimeType', {mimetype2ext}
)),
372 'subtitles': subtitles
,
373 **traverse_obj(root
, {
374 'id': ('uri', {url_basename}
),
375 'thumbnail': (*embed_path
, 'thumbnail', {url_or_none}
),
376 'alt_title': (*embed_path
, 'alt', {str}
, filter),
377 'uploader': ('author', 'displayName', {str}
),
378 'uploader_id': ('author', 'handle', {str}
),
379 'uploader_url': ('author', 'handle', {self
._build
_profile
_url
}),
380 'channel_id': ('author', 'did', {str}
),
381 'channel_url': ('author', 'did', {self
._build
_profile
_url
}),
382 'like_count': ('likeCount', {int_or_none}
),
383 'repost_count': ('repostCount', {int_or_none}
),
384 'comment_count': ('replyCount', {int_or_none}
),
385 'timestamp': ('indexedAt', {parse_iso8601}
),
386 'tags': ('labels', ..., 'val', {str}
, all
, {orderedSet}
),
388 'labels', ..., 'val', {lambda x
: 18 if x
in ('sexual', 'porn', 'graphic-media') else None}, any
),
389 'description': (*record_path
, 'text', {str}
, filter),
390 'title': (*record_path
, 'text', {lambda x
: x
.replace('\n', ' ')}, {truncate_string(left
=50)}),