4 from .common
import InfoExtractor
5 from ..networking
.exceptions
import HTTPError
14 get_elements_html_by_class
,
25 class RumbleEmbedIE(InfoExtractor
):
26 _VALID_URL
= r
'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
27 _EMBED_REGEX
= [fr
'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl
["\']\s*:\s*)["\'](?P
<url
>{_VALID_URL}
)']
29 'url
': 'https
://rumble
.com
/embed
/v5pv5f
',
30 'md5
': '36a18a049856720189f30977ccbb2c34
',
34 'title
': 'WMAR
2 News Latest Headlines | October
20, 6pm
',
35 'timestamp
': 1571611968,
36 'upload_date
': '20191020',
37 'channel_url
': 'https
://rumble
.com
/c
/WMAR
',
39 'thumbnail
': 'https
://sp
.rmbl
.ws
/s8
/1/5/M
/z
/1/5Mz1a
.qR4e
-small
-WMAR
-2-News
-Latest
-Headline
.jpg
',
42 'live_status
': 'not_live
',
45 'url
': 'https
://rumble
.com
/embed
/vslb7v
',
46 'md5
': '7418035de1a30a178b8af34dc2b6a52b
',
50 'title
': 'Defense Sec
. says US Commitment to NATO Defense
\'Ironclad
\'',
51 'timestamp
': 1645142135,
52 'upload_date
': '20220217',
53 'channel_url
': 'https
://rumble
.com
/c
/CyberTechNews
',
55 'thumbnail
': 'https
://sp
.rmbl
.ws
/s8
/6/7/i
/9/h
/7i9hd
.OvCc
.jpg
',
58 'live_status
': 'not_live
',
61 'url
': 'https
://rumble
.com
/embed
/vunh1h
',
65 'title
': '‘Gideon
, op zoek naar de waarheid’ including ENG SUBS
',
66 'timestamp
': 1647197663,
67 'upload_date
': '20220313',
68 'channel_url
': 'https
://rumble
.com
/user
/BLCKBX
',
70 'thumbnail
': r're
:https
://.+\
.jpg
',
73 'live_status
': 'not_live
',
77 'url
': r're
:https
://.+\
.vtt
',
84 'params
': {'skip_download
': True},
86 'url
': 'https
://rumble
.com
/embed
/v1essrt
',
90 'title
': 'startswith
:lofi hip hop radio 📚
- beats to relax
/study to
',
91 'timestamp
': 1661519399,
92 'upload_date
': '20220826',
93 'channel_url
': 'https
://rumble
.com
/c
/LofiGirl
',
94 'channel
': 'Lofi Girl
',
95 'thumbnail
': r're
:https
://.+\
.jpg
',
96 'uploader
': 'Lofi Girl
',
97 'live_status
': 'is_live
',
99 'params
': {'skip_download
': True},
101 'url
': 'https
://rumble
.com
/embed
/v1amumr
',
106 'title
': 'Turning Point USA
2022 Student Action Summit DAY
1 - Rumble Exclusive Live
',
107 'timestamp
': 1658518457,
108 'upload_date
': '20220722',
109 'channel_url
': 'https
://rumble
.com
/c
/RumbleEvents
',
110 'channel
': 'Rumble Events
',
111 'thumbnail
': r're
:https
://.+\
.jpg
',
113 'uploader
': 'Rumble Events
',
114 'live_status
': 'was_live
',
116 'params
': {'skip_download
': True},
118 'url
': 'https
://rumble
.com
/embed
/ufe9n
.v5pv5f
',
119 'only_matching
': True,
124 'note
': 'Rumble JS embed
',
125 'url
': 'https
://therightscoop
.com
/what
-does
-9-plus
-1-plus
-1-equal
-listen
-to
-this
-audio
-of
-attempted
-kavanaugh
-assassins
-call
-and-youll
-get
-it
',
126 'md5
': '4701209ac99095592e73dbba21889690
',
130 'channel
': 'Mr Producer Media
',
132 'title
': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh
',
133 'channel_url
': 'https
://rumble
.com
/c
/RichSementa
',
134 'thumbnail
': 'https
://sp
.rmbl
.ws
/s8
/1/P
/j
/f
/A
/PjfAe
.qR4e
-small
-911-Audio
-From
-The
-Man
-Who
-.jpg
',
135 'timestamp
': 1654892716,
136 'uploader
': 'Mr Producer Media
',
137 'upload_date
': '20220610',
138 'live_status
': 'not_live
',
144 def _extract_embed_urls(cls, url, webpage):
145 embeds = tuple(super()._extract_embed_urls(url, webpage))
148 return [f'https
://rumble
.com
/embed
/{mobj
.group("id")}' for mobj in re.finditer(
149 r'<script
>[^
<]*\bRumble\
(\s
*"play"\s
*,\s
*{[^
}]*[\'"]?video[\'"]?\s
*:\s
*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
151 def _real_extract(self, url):
152 video_id = self._match_id(url)
153 video = self._download_json(
154 'https
://rumble
.com
/embedJS
/u3
/', video_id,
155 query={'request
': 'video
', 'ver
': 2, 'v
': video_id})
157 sys_msg = traverse_obj(video, ('sys
', 'msg
'))
159 self.report_warning(sys_msg, video_id=video_id)
161 if video.get('live
') == 0:
162 live_status = 'not_live
' if video.get('livestream_has_dvr
') is None else 'was_live
'
163 elif video.get('live
') == 1:
164 live_status = 'is_upcoming
' if video.get('livestream_has_dvr
') else 'was_live
'
165 elif video.get('live
') == 2:
166 live_status = 'is_live
'
171 for ext, ext_info in (video.get('ua
') or {}).items():
172 if isinstance(ext_info, dict):
173 for height, video_info in ext_info.items():
174 if not traverse_obj(video_info, ('meta
', 'h
', {int_or_none})):
175 video_info.setdefault('meta
', {})['h
'] = height
176 ext_info = ext_info.values()
178 for video_info in ext_info:
179 meta = video_info.get('meta
') or {}
180 if not video_info.get('url
'):
183 if meta.get('live
') is True and video.get('live
') == 1:
184 live_status = 'post_live
'
185 formats.extend(self._extract_m3u8_formats(
186 video_info['url
'], video_id,
187 ext='mp4
', m3u8_id='hls
', fatal=False, live=live_status == 'is_live
'))
189 timeline = ext == 'timeline
'
191 ext = determine_ext(video_info['url
'])
194 'acodec
': 'none
' if timeline else None,
195 'url
': video_info['url
'],
196 'format_id
': join_nonempty(ext, format_field(meta, 'h
', '%sp
')),
197 'format_note
': 'Timeline
' if timeline else None,
198 'fps
': None if timeline else video.get('fps
'),
199 **traverse_obj(meta, {
204 }, expected_type=lambda x: int(x) or None),
209 'url
': sub_info['path
'],
210 'name
': sub_info.get('language
') or '',
211 }] for lang, sub_info in (video.get('cc
') or {}).items() if sub_info.get('path
')
214 author = video.get('author
') or {}
215 thumbnails = traverse_obj(video, ('t
', ..., {'url
': 'i
', 'width
': 'w
', 'height
': 'h
'}))
216 if not thumbnails and video.get('i
'):
217 thumbnails = [{'url
': video['i
']}]
219 if live_status in {'is_live
', 'post_live
'}:
222 duration = int_or_none(video.get('duration
'))
226 'title
': unescapeHTML(video.get('title
')),
228 'subtitles
': subtitles,
229 'thumbnails
': thumbnails,
230 'timestamp
': parse_iso8601(video.get('pubDate
')),
231 'channel
': author.get('name
'),
232 'channel_url
': author.get('url
'),
233 'duration
': duration,
234 'uploader
': author.get('name
'),
235 'live_status
': live_status,
239 class RumbleIE(InfoExtractor):
240 _VALID_URL = r'https?
://(?
:www\
.)?rumble\
.com
/(?P
<id>v(?
!ideos
)[\w
.-]+)[^
/]*$
'
242 r'<a
class=video
-item
--a href
=(?P
<url
>/v
[\w
.-]+\
.html
)>',
243 r'<a
[^
>]+class="videostream__link link"[^
>]+href
=(?P
<url
>/v
[\w
.-]+\
.html
)[^
>]*>']
245 'add_ie
': ['RumbleEmbed
'],
246 'url
': 'https
://rumble
.com
/vdmum1
-moose
-the
-dog
-helps
-girls
-dig
-a
-snow
-fort
.html
',
247 'md5
': '53af34098a7f92c4e51cf0bd1c33f009
',
251 'timestamp
': 1612662578,
252 'uploader
': 'LovingMontana
',
253 'channel
': 'LovingMontana
',
254 'upload_date
': '20210207',
255 'title
': 'Winter
-loving dog helps girls dig a snow fort
',
256 'description
': 'Moose the dog
is more than happy to
help with digging out this epic snow fort
. Great job
, Moose
!',
257 'channel_url
': 'https
://rumble
.com
/c
/c
-546523',
258 'thumbnail
': r're
:https
://.+\
.jpg
',
261 'dislike_count
': int,
263 'live_status
': 'not_live
',
266 'url
': 'http
://www
.rumble
.com
/vDMUM1?key
=value
',
267 'only_matching
': True,
269 'note
': 'timeline format
',
270 'url
': 'https
://rumble
.com
/v2ea9qb
-the
-u
.s
.-cannot
-hide
-this
-in-ukraine
-anymore
-redacted
-with
-natali
-and-clayt
.html
',
271 'md5
': '40d61fec6c0945bca3d0e1dc1aa53d79
',
272 'params
': {'format
': 'wv
'},
276 'uploader
': 'Redacted News
',
277 'upload_date
': '20230322',
278 'timestamp
': 1679445010,
279 'title
': 'The U
.S
. CANNOT hide this
in Ukraine anymore | Redacted with Natali
and Clayton Morris
',
281 'channel
': 'Redacted News
',
282 'description
': 'md5
:aaad0c5c3426d7a361c29bdaaced7c42
',
283 'channel_url
': 'https
://rumble
.com
/c
/Redacted
',
284 'live_status
': 'not_live
',
285 'thumbnail
': 'https
://sp
.rmbl
.ws
/s8
/1/d
/x
/2/O
/dx2Oi
.qR4e
-small
-The
-U
.S
.-CANNOT
-hide
-this
-i
.jpg
',
287 'dislike_count
': int,
291 'url
': 'https
://rumble
.com
/v2e7fju
-the
-covid
-twitter
-files
-drop
-protecting
-fauci
-while-censoring
-the
-truth
-wma
.html
',
295 'live_status
': 'was_live
',
296 'release_timestamp
': 1679446804,
297 'description
': 'md5
:2ac4908ccfecfb921f8ffa4b30c1e636
',
298 'release_date
': '20230322',
299 'timestamp
': 1679445692,
301 'upload_date
': '20230322',
302 'title
': 'The Covid Twitter Files Drop
: Protecting Fauci While Censoring The Truth w
/Matt Taibbi
',
303 'uploader
': 'Kim Iversen
',
304 'channel_url
': 'https
://rumble
.com
/c
/KimIversen
',
305 'channel
': 'Kim Iversen
',
306 'thumbnail
': 'https
://sp
.rmbl
.ws
/s8
/1/6/b
/w
/O
/6bwOi
.qR4e
-small
-The
-Covid
-Twitter
-Files
-Dro
.jpg
',
308 'dislike_count
': int,
314 'url
': 'https
://rumble
.com
/videos?page
=2',
315 'playlist_mincount
': 24,
317 'id': 'videos?page
=2',
318 'title
': 'All videos
',
319 'description
': 'Browse videos uploaded to Rumble
.com
',
323 'url
': 'https
://rumble
.com
/browse
/live
',
324 'playlist_mincount
': 25,
331 'url
': 'https
://rumble
.com
/search
/video?q
=rumble
&sort
=views
',
332 'playlist_mincount
': 24,
334 'id': 'video?q
=rumble
&sort
=views
',
335 'title
': 'Search results
for: rumble
',
340 def _real_extract(self, url):
341 page_id = self._match_id(url)
342 webpage = self._download_webpage(url, page_id)
343 url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None)
345 raise UnsupportedError(url)
348 '_type
': 'url_transparent
',
349 'ie_key
': url_info['ie_key
'],
350 'url
': url_info['url
'],
351 'release_timestamp
': parse_iso8601(self._search_regex(
352 r'(?
:Livestream begins|Streamed on
):\s
+<time datetime
="([^"]+)', webpage, 'release date
', default=None)),
353 'view_count
': int_or_none(self._search_regex(
354 r'"userInteractionCount"\s
*:\s
*(\d
+)', webpage, 'view count
', default=None)),
355 'like_count
': parse_count(self._search_regex(
356 r'<span data
-js
="rumbles_up_votes">\s
*([\d
,.KM
]+)', webpage, 'like count
', default=None)),
357 'dislike_count
': parse_count(self._search_regex(
358 r'<span data
-js
="rumbles_down_votes">\s
*([\d
,.KM
]+)', webpage, 'dislike count
', default=None)),
359 'description
': clean_html(get_element_by_class('media
-description
', webpage)),
363 class RumbleChannelIE(InfoExtractor):
364 _VALID_URL = r'(?P
<url
>https?
://(?
:www\
.)?rumble\
.com
/(?
:c|user
)/(?P
<id>[^
&?
#$/]+))'
367 'url': 'https://rumble.com/c/Styxhexenhammer666',
368 'playlist_mincount': 1160,
370 'id': 'Styxhexenhammer666',
373 'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
374 'playlist_mincount': 4,
376 'id': 'goldenpoodleharleyeuna',
380 def entries(self
, url
, playlist_id
):
381 for page
in itertools
.count(1):
383 webpage
= self
._download
_webpage
(f
'{url}?page={page}', playlist_id
, note
=f
'Downloading page {page}')
384 except ExtractorError
as e
:
385 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 404:
388 for video_url
in traverse_obj(
389 get_elements_html_by_class('videostream__link', webpage
), (..., {extract_attributes}
, 'href'),
391 yield self
.url_result(urljoin('https://rumble.com', video_url
))
393 def _real_extract(self
, url
):
394 url
, playlist_id
= self
._match
_valid
_url
(url
).groups()
395 return self
.playlist_result(self
.entries(url
, playlist_id
), playlist_id
=playlist_id
)