7 from .common
import InfoExtractor
, SearchInfoExtractor
23 _API_BASE_URL
= 'https://prod-api-v2.production.rokfin.com/api/v2/public/'
26 class RokfinIE(InfoExtractor
):
27 _VALID_URL
= r
'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)'
28 _NETRC_MACHINE
= 'rokfin'
29 _AUTH_BASE
= 'https://secure.rokfin.com/auth/realms/rokfin-web/protocol/openid-connect'
30 _access_mgmt_tokens
= {} # OAuth 2.0: RFC 6749, Sec. 1.4-5
32 'url': 'https://www.rokfin.com/post/57548/Mitt-Romneys-Crazy-Solution-To-Climate-Change',
36 'title': 'Mitt Romney\'s Crazy Solution To Climate Change',
37 'thumbnail': r
're:https://img\.production\.rokfin\.com/.+',
38 'upload_date': '20211023',
39 'timestamp': 1634998029,
40 'channel': 'Jimmy Dore',
41 'channel_id': '65429',
42 'channel_url': 'https://rokfin.com/TheJimmyDoreShow',
43 'availability': 'public',
44 'live_status': 'not_live',
50 'url': 'https://rokfin.com/post/223/Julian-Assange-Arrested-Streaming-In-Real-Time',
54 'title': 'Julian Assange Arrested: Streaming In Real Time',
55 'thumbnail': r
're:https://img\.production\.rokfin\.com/.+',
56 'upload_date': '20190412',
57 'timestamp': 1555052644,
58 'channel': 'Ron Placone',
60 'channel_url': 'https://rokfin.com/RonPlacone',
61 'availability': 'public',
62 'live_status': 'not_live',
65 'tags': ['FreeThinkingMedia^', 'RealProgressives^'],
68 'url': 'https://www.rokfin.com/stream/10543/Its-A-Crazy-Mess-Regional-Director-Blows-Whistle-On-Pfizers-Vaccine-Trial-Data',
72 'title': '"It\'s A Crazy Mess" Regional Director Blows Whistle On Pfizer\'s Vaccine Trial Data',
73 'thumbnail': r
're:https://img\.production\.rokfin\.com/.+',
74 'description': 'md5:324ce2d3e3b62e659506409e458b9d8e',
75 'channel': 'TLAVagabond',
76 'channel_id': '53856',
77 'channel_url': 'https://rokfin.com/TLAVagabond',
78 'availability': 'public',
81 'live_status': 'was_live',
82 'timestamp': 1635874720,
83 'release_timestamp': 1635874720,
84 'release_date': '20211102',
85 'upload_date': '20211102',
88 'tags': ['FreeThinkingMedia^'],
91 'url': 'https://rokfin.com/post/126703/Brave-New-World--Aldous-Huxley-DEEPDIVE--Chpts-13--Quite-Frankly--Jay-Dyer',
95 'title': 'Brave New World - Aldous Huxley DEEPDIVE! (Chpts 1-3) - Quite Frankly & Jay Dyer',
96 'thumbnail': r
're:https://img\.production\.rokfin\.com/.+',
97 'channel': 'Jay Dyer',
98 'channel_id': '186881',
99 'channel_url': 'https://rokfin.com/jaydyer',
100 'availability': 'premium_only',
101 'live_status': 'not_live',
102 'dislike_count': int,
104 'timestamp': 1678213357,
105 'upload_date': '20230307',
106 'tags': ['FreeThinkingMedia^', 'OpenMind^'],
107 'description': 'md5:cb04e32e68326c9b2b251b297bacff35',
111 'url': 'https://rokfin.com/stream/31332/The-Grayzone-live-on-Nordstream-blame-game',
113 'id': 'stream/31332',
115 'title': 'The Grayzone live on Nordstream blame game',
116 'thumbnail': r
're:https://image\.v\.rokfin\.com/.+',
117 'channel': 'Max Blumenthal',
118 'channel_id': '248902',
119 'channel_url': 'https://rokfin.com/MaxBlumenthal',
120 'availability': 'premium_only',
121 'live_status': 'was_live',
122 'dislike_count': int,
124 'timestamp': 1678475166,
125 'release_timestamp': 1678475166.0,
126 'release_date': '20230310',
127 'upload_date': '20230310',
128 'tags': ['FreeThinkingMedia^'],
132 def _real_extract(self
, url
):
133 video_id
, video_type
= self
._match
_valid
_url
(url
).group('id', 'type')
134 metadata
= self
._download
_json
_using
_access
_token
(f
'{_API_BASE_URL}{video_id}', video_id
)
136 scheduled
= unified_timestamp(metadata
.get('scheduledAt'))
137 live_status
= ('was_live' if metadata
.get('stoppedAt')
138 else 'is_upcoming' if scheduled
139 else 'is_live' if video_type
== 'stream'
142 video_url
= traverse_obj(metadata
, 'url', ('content', 'contentUrl'), expected_type
=url_or_none
)
143 if video_url
in (None, 'fake.m3u8'):
144 video_url
= format_field(self
._search
_regex
(
145 r
'https?://[^/]+/([^/]+)/storyboard.vtt',
146 traverse_obj(metadata
, 'timelineUrl', ('content', 'timelineUrl'), expected_type
=url_or_none
),
147 video_id
, default
=None), None, 'https://stream.v.rokfin.com/%s.m3u8')
149 formats
, subtitles
= [{'url': video_url
}] if video_url
else [], {}
150 if determine_ext(video_url
) == 'm3u8':
151 formats
, subtitles
= self
._extract
_m
3u8_formats
_and
_subtitles
(
152 video_url
, video_id
, fatal
=False, live
=live_status
== 'is_live')
155 if traverse_obj(metadata
, 'premiumPlan', 'premium'):
156 self
.raise_login_required('This video is only available to premium users', True, method
='cookies')
158 self
.raise_no_formats(
159 f
'Stream is offline; scheduled for {dt.datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
160 video_id
=video_id
, expected
=True)
162 uploader
= traverse_obj(metadata
, ('createdBy', 'username'), ('creator', 'username'))
163 timestamp
= (scheduled
or float_or_none(metadata
.get('postedAtMilli'), 1000)
164 or unified_timestamp(metadata
.get('creationDateTime')))
168 'subtitles': subtitles
,
169 'title': str_or_none(traverse_obj(metadata
, 'title', ('content', 'contentTitle'))),
170 'duration': float_or_none(traverse_obj(metadata
, ('content', 'duration'))),
171 'thumbnail': url_or_none(traverse_obj(metadata
, 'thumbnail', ('content', 'thumbnailUrl1'))),
172 'description': str_or_none(traverse_obj(metadata
, 'description', ('content', 'contentDescription'))),
173 'like_count': int_or_none(metadata
.get('likeCount')),
174 'dislike_count': int_or_none(metadata
.get('dislikeCount')),
175 'channel': str_or_none(traverse_obj(metadata
, ('createdBy', 'name'), ('creator', 'name'))),
176 'channel_id': str_or_none(traverse_obj(metadata
, ('createdBy', 'id'), ('creator', 'id'))),
177 'channel_url': url_or_none(f
'https://rokfin.com/{uploader}') if uploader
else None,
178 'timestamp': timestamp
,
179 'release_timestamp': timestamp
if live_status
!= 'not_live' else None,
180 'tags': traverse_obj(metadata
, ('tags', ..., 'title'), expected_type
=str_or_none
),
181 'live_status': live_status
,
182 'availability': self
._availability
(
183 needs_premium
=bool(traverse_obj(metadata
, 'premiumPlan', 'premium')),
184 is_private
=False, needs_subscription
=False, needs_auth
=False, is_unlisted
=False),
185 # 'comment_count': metadata.get('numComments'), # Data provided by website is wrong
186 '__post_extractor': self
.extract_comments(video_id
) if video_type
== 'post' else None,
189 def _get_comments(self
, video_id
):
191 for page_n
in itertools
.count():
192 raw_comments
= self
._download
_json
(
193 f
'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50',
194 video_id
, note
=f
'Downloading viewer comments page {page_n + 1}{format_field(pages_total, None, " of %s")}',
197 for comment
in raw_comments
.get('content') or []:
199 'text': str_or_none(comment
.get('comment')),
200 'author': str_or_none(comment
.get('name')),
201 'id': comment
.get('commentId'),
202 'author_id': comment
.get('userId'),
204 'like_count': int_or_none(comment
.get('numLikes')),
205 'dislike_count': int_or_none(comment
.get('numDislikes')),
206 'timestamp': unified_timestamp(comment
.get('postedAt')),
209 pages_total
= int_or_none(raw_comments
.get('totalPages')) or None
210 is_last
= raw_comments
.get('last')
211 if not raw_comments
.get('content') or is_last
or (page_n
> pages_total
if pages_total
else is_last
is not False):
214 def _perform_login(self
, username
, password
):
215 # https://openid.net/specs/openid-connect-core-1_0.html#CodeFlowAuth (Sec. 3.1)
216 login_page
= self
._download
_webpage
(
217 f
'{self._AUTH_BASE}/auth?client_id=web&redirect_uri=https%3A%2F%2Frokfin.com%2Ffeed&response_mode=fragment&response_type=code&scope=openid',
218 None, note
='loading login page', errnote
='error loading login page')
219 authentication_point_url
= unescapeHTML(self
._search
_regex
(
220 r
'<form\s+[^>]+action\s*=\s*"(https://secure\.rokfin\.com/auth/realms/rokfin-web/login-actions/authenticate\?[^"]+)"',
221 login_page
, name
='Authentication URL'))
223 resp_body
= self
._download
_webpage
(
224 authentication_point_url
, None, note
='logging in', fatal
=False, expected_status
=404,
225 data
=urlencode_postdata({'username': username
, 'password': password
, 'rememberMe': 'off', 'credentialId': ''}))
226 if not self
._authentication
_active
():
227 if re
.search(r
'(?i)(invalid\s+username\s+or\s+password)', resp_body
or ''):
228 raise ExtractorError('invalid username/password', expected
=True)
229 raise ExtractorError('Login failed')
231 urlh
= self
._request
_webpage
(
232 f
'{self._AUTH_BASE}/auth', None,
233 note
='granting user authorization', errnote
='user authorization rejected by Rokfin',
237 'redirect_uri': 'https://rokfin.com/silent-check-sso.html',
238 'response_mode': 'fragment',
239 'response_type': 'code',
242 self
._access
_mgmt
_tokens
= self
._download
_json
(
243 f
'{self._AUTH_BASE}/token', None,
244 note
='getting access credentials', errnote
='error getting access credentials',
245 data
=urlencode_postdata({
246 'code': urllib
.parse
.parse_qs(urllib
.parse
.urldefrag(urlh
.url
).fragment
).get('code')[0],
248 'grant_type': 'authorization_code',
249 'redirect_uri': 'https://rokfin.com/silent-check-sso.html',
252 def _authentication_active(self
):
254 {'KEYCLOAK_IDENTITY', 'KEYCLOAK_IDENTITY_LEGACY', 'KEYCLOAK_SESSION', 'KEYCLOAK_SESSION_LEGACY'}
255 - set(self
._get
_cookies
(self
._AUTH
_BASE
)))
257 def _get_auth_token(self
):
258 return try_get(self
._access
_mgmt
_tokens
, lambda x
: ' '.join([x
['token_type'], x
['access_token']]))
260 def _download_json_using_access_token(self
, url_or_request
, video_id
, headers
={}, query
={}):
261 assert 'authorization' not in headers
262 headers
= headers
.copy()
263 auth_token
= self
._get
_auth
_token
()
264 refresh_token
= self
._access
_mgmt
_tokens
.get('refresh_token')
266 headers
['authorization'] = auth_token
268 json_string
, urlh
= self
._download
_webpage
_handle
(
269 url_or_request
, video_id
, headers
=headers
, query
=query
, expected_status
=401)
270 if not auth_token
or urlh
.status
!= 401 or refresh_token
is None:
271 return self
._parse
_json
(json_string
, video_id
)
273 self
._access
_mgmt
_tokens
= self
._download
_json
(
274 f
'{self._AUTH_BASE}/token', video_id
,
275 note
='User authorization expired or canceled by Rokfin. Re-authorizing ...', errnote
='Failed to re-authorize',
276 data
=urlencode_postdata({
277 'grant_type': 'refresh_token',
278 'refresh_token': refresh_token
,
281 headers
['authorization'] = self
._get
_auth
_token
()
282 if headers
['authorization'] is None:
283 raise ExtractorError('User authorization lost', expected
=True)
285 return self
._download
_json
(url_or_request
, video_id
, headers
=headers
, query
=query
)
288 class RokfinPlaylistBaseIE(InfoExtractor
):
293 'dead_stream': 'stream',
297 def _get_video_data(self
, metadata
):
298 for content
in metadata
.get('content') or []:
299 media_type
= self
._TYPES
.get(content
.get('mediaType'))
300 video_id
= content
.get('id') if media_type
== 'post' else content
.get('mediaId')
301 if not media_type
or not video_id
:
304 yield self
.url_result(f
'https://rokfin.com/{media_type}/{video_id}', video_id
=f
'{media_type}/{video_id}',
305 video_title
=str_or_none(traverse_obj(content
, ('content', 'contentTitle'))))
308 class RokfinStackIE(RokfinPlaylistBaseIE
):
309 IE_NAME
= 'rokfin:stack'
310 IE_DESC
= 'Rokfin Stacks'
311 _VALID_URL
= r
'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)'
313 'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020',
320 def _real_extract(self
, url
):
321 list_id
= self
._match
_id
(url
)
322 return self
.playlist_result(self
._get
_video
_data
(
323 self
._download
_json
(f
'{_API_BASE_URL}stack/{list_id}', list_id
)), list_id
)
326 class RokfinChannelIE(RokfinPlaylistBaseIE
):
327 IE_NAME
= 'rokfin:channel'
328 IE_DESC
= 'Rokfin Channels'
329 _VALID_URL
= r
'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$'
331 'url': 'https://rokfin.com/TheConvoCouch',
332 'playlist_mincount': 100,
335 'title': 'TheConvoCouch - New',
336 'description': 'md5:bb622b1bca100209b91cd685f7847f06',
349 def _real_initialize(self
):
350 self
._validate
_extractor
_args
()
352 def _validate_extractor_args(self
):
353 requested_tabs
= self
._configuration
_arg
('tab', None)
354 if requested_tabs
is not None and (len(requested_tabs
) > 1 or requested_tabs
[0] not in self
._TABS
):
355 raise ExtractorError(f
'Invalid extractor-arg "tab". Must be one of {", ".join(self._TABS)}', expected
=True)
357 def _entries(self
, channel_id
, channel_name
, tab
):
359 for page_n
in itertools
.count(0):
360 if tab
in ('posts', 'top'):
361 data_url
= f
'{_API_BASE_URL}user/{channel_name}/{tab}?page={page_n}&size=50'
363 data_url
= f
'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}'
364 metadata
= self
._download
_json
(
365 data_url
, channel_name
,
366 note
=f
'Downloading video metadata page {page_n + 1}{format_field(pages_total, None, " of %s")}')
368 yield from self
._get
_video
_data
(metadata
)
369 pages_total
= int_or_none(metadata
.get('totalPages')) or None
370 is_last
= metadata
.get('last')
371 if is_last
or (page_n
> pages_total
if pages_total
else is_last
is not False):
374 def _real_extract(self
, url
):
375 channel_name
= self
._match
_id
(url
)
376 channel_info
= self
._download
_json
(f
'{_API_BASE_URL}user/{channel_name}', channel_name
)
377 channel_id
= channel_info
['id']
378 tab
= self
._configuration
_arg
('tab', default
=['new'])[0]
380 return self
.playlist_result(
381 self
._entries
(channel_id
, channel_name
, self
._TABS
[tab
]),
382 f
'{channel_id}-{tab}', f
'{channel_name} - {tab.title()}', str_or_none(channel_info
.get('description')))
385 class RokfinSearchIE(SearchInfoExtractor
):
386 IE_NAME
= 'rokfin:search'
387 IE_DESC
= 'Rokfin Search'
388 _SEARCH_KEY
= 'rkfnsearch'
390 'video': (('id', 'raw'), 'post'),
391 'audio': (('id', 'raw'), 'post'),
392 'stream': (('content_id', 'raw'), 'stream'),
393 'dead_stream': (('content_id', 'raw'), 'stream'),
394 'stack': (('content_id', 'raw'), 'stack'),
397 'url': 'rkfnsearch5:"zelenko"',
401 'title': '"zelenko"',
405 _db_access_key
= None
407 def _real_initialize(self
):
408 self
._db
_url
, self
._db
_access
_key
= self
.cache
.load(self
.ie_key(), 'auth', default
=(None, None))
410 self
._get
_db
_access
_credentials
()
412 def _search_results(self
, query
):
414 for page_number
in itertools
.count(1):
415 search_results
= self
._run
_search
_query
(
416 query
, data
={'query': query
, 'page': {'size': 100, 'current': page_number
}},
417 note
=f
'Downloading page {page_number}{format_field(total_pages, None, " of ~%s")}')
418 total_pages
= traverse_obj(search_results
, ('meta', 'page', 'total_pages'), expected_type
=int_or_none
)
420 for result
in search_results
.get('results') or []:
421 video_id_key
, video_type
= self
._TYPES
.get(traverse_obj(result
, ('content_type', 'raw')), (None, None))
422 video_id
= traverse_obj(result
, video_id_key
, expected_type
=int_or_none
)
423 if video_id
and video_type
:
424 yield self
.url_result(url
=f
'https://rokfin.com/{video_type}/{video_id}')
425 if not search_results
.get('results'):
428 def _run_search_query(self
, video_id
, data
, **kwargs
):
429 data
= json
.dumps(data
).encode()
430 for attempt
in range(2):
431 search_results
= self
._download
_json
(
432 self
._db
_url
, video_id
, data
=data
, fatal
=(attempt
== 1),
433 headers
={'authorization': self
._db
_access
_key
}, **kwargs
)
435 return search_results
436 self
.write_debug('Updating access credentials')
437 self
._get
_db
_access
_credentials
(video_id
)
439 def _get_db_access_credentials(self
, video_id
=None):
440 auth_data
= {'SEARCH_KEY': None, 'ENDPOINT_BASE': None}
441 notfound_err_page
= self
._download
_webpage
(
442 'https://rokfin.com/discover', video_id
, expected_status
=404, note
='Downloading home page')
443 for js_file_path
in re
.findall(r
'<script\b[^>]*\ssrc\s*=\s*"(/static/js/[^">]+)"', notfound_err_page
):
444 js_content
= self
._download
_webpage
(
445 f
'https://rokfin.com{js_file_path}', video_id
, note
='Downloading JavaScript file', fatal
=False)
446 auth_data
.update(re
.findall(
447 rf
'REACT_APP_({"|".join(auth_data.keys())})\s*:\s*"([^"]+)"', js_content
or ''))
448 if not all(auth_data
.values()):
451 self
._db
_url
= url_or_none(f
'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json')
452 self
._db
_access
_key
= f
'Bearer {auth_data["SEARCH_KEY"]}'
453 self
.cache
.store(self
.ie_key(), 'auth', (self
._db
_url
, self
._db
_access
_key
))
455 raise ExtractorError('Unable to extract access credentials')