3 from .common
import InfoExtractor
5 compat_etree_fromstring
,
7 compat_urllib_parse_unquote
,
8 compat_urllib_parse_urlparse
,
10 from ..networking
import HEADRequest
26 class OdnoklassnikiIE(InfoExtractor
):
29 (?:(?:www|m|mobile)\.)?
30 (?:odnoklassniki|ok)\.ru/
32 video(?P<embed>embed)?/|
33 web-api/video/moviePlayer/|
39 _EMBED_REGEX
= [r
'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:odnoklassniki|ok
)\
.ru
/videoembed
/.+?
)\
1']
41 'note
': 'Coub embedded
',
42 'url
': 'http
://ok
.ru
/video
/1484130554189',
46 'timestamp
': 1545580896,
48 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
49 'title
': 'Народная забава
',
51 'upload_date
': '20181223',
53 'uploader_id
': 'nevata
.s
',
59 'note
': 'vk
.com embedded
',
60 'url
': 'https
://ok
.ru
/video
/3568183087575',
62 'id': '-165101755_456243749',
64 'uploader_id
': '-165101755',
66 'timestamp
': 1642869935,
67 'upload_date
': '20220122',
72 'skip
': 'vk extractor error
',
74 # metadata in JSON, webm_dash with Firefox UA
75 'url
': 'http
://ok
.ru
/video
/20079905452',
76 'md5
': '8f477d8931c531374a3e36daec617b2c
',
80 'title
': 'Культура меняет нас
(прекрасный ролик
!))',
83 'upload_date
': '20141207',
84 'uploader_id
': '330537914540',
85 'uploader
': 'Виталий Добровольский
',
90 'format
': 'bv
[ext
=webm
]',
91 'http_headers
': {'User
-Agent
': 'Mozilla
/5.0 (Windows NT
10.0; rv
:102.0) Gecko
/20100101 Firefox
/102.0'},
95 'url
': 'http
://ok
.ru
/video
/63567059965189-0?fromTime
=5',
96 'md5
': '2bae2f58eefe1b3d26f3926c4a64d2f3
',
98 'id': '63567059965189-0',
100 'title
': 'Девушка без комплексов
...',
103 'upload_date
': '20150518',
104 'uploader_id
': '534380003155',
105 'uploader
': '☭ Андрей Мещанинов ☭
',
110 'params
': {'skip_download
': 'm3u8
'},
112 # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
113 'url
': 'https
://ok
.ru
/video
/3952212382174',
114 'md5
': '5fb5f83ce16cb212d6bf887282b5da53
',
118 'title
': 'Youtube
-dl
101: What
is it
and HOW to use it
! Full Download Walkthrough
and Guide
',
119 'description
': 'md5
:b57209eeb9d5c2f20c984dfb58862097
',
120 'uploader
': 'Lod Mer
',
121 'uploader_id
': '575186401502',
124 'upload_date
': '20210405',
125 'comment_count
': int,
126 'live_status
': 'not_live
',
128 'thumbnail
': 'https
://i
.mycdn
.me
/i?r
=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5
-HIEAt7Zi9s0CiBOSDmbngC
-I
-k
&fn
=external_8
',
129 'uploader_url
': 'https
://www
.youtube
.com
/@MrKewlkid94',
130 'channel_follower_count
': int,
131 'tags
': ['youtube
-dl
', 'youtube playlists
', 'download videos
', 'download audio
'],
132 'channel_id
': 'UCVGtvURtEURYHtJFUegdSug
',
134 'availability
': 'public
',
135 'channel_url
': 'https
://www
.youtube
.com
/channel
/UCVGtvURtEURYHtJFUegdSug
',
136 'categories
': ['Education
'],
137 'playable_in_embed
': True,
138 'channel
': 'BornToReact
',
141 # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
142 'url
': 'http
://ok
.ru
/video
/62036049272859-0',
144 'id': '62036049272859-0',
146 'title
': 'МУЗЫКА ДОЖДЯ
.',
147 'description
': 'md5
:6f1867132bd96e33bf53eda1091e8ed0
',
148 'upload_date
': '20120106',
149 'uploader_id
': '473534735899',
150 'uploader
': 'МARINA D
',
154 'skip_download
': True,
156 'skip
': 'Video has
not been found
',
158 'note
': 'Only available
in mobile webpage
',
159 'url
': 'https
://m
.ok
.ru
/video
/2361249957145',
161 'id': '2361249957145',
163 'title
': 'Быковское крещение
',
164 'duration
': 3038.181,
165 'thumbnail
': r're
:^https?
://i\
.mycdn\
.me
/videoPreview
\?.+',
169 'url
': 'https
://ok
.ru
/video
/4249587550747',
171 'id': '4249587550747',
173 'title
': 'Small Country An African
Childhood (2020) (1080p
) +subtitle
',
174 'uploader
': 'Sunflower Movies
',
175 'uploader_id
': '595802161179',
176 'upload_date
': '20220816',
179 'thumbnail
': r're
:^https?
://i\
.mycdn\
.me
/videoPreview
\?.+',
184 'skip_download
': True,
187 'url
': 'http
://ok
.ru
/web
-api
/video
/moviePlayer
/20079905452',
188 'only_matching
': True,
190 'url
': 'http
://www
.ok
.ru
/video
/20648036891',
191 'only_matching
': True,
193 'url
': 'http
://www
.ok
.ru
/videoembed
/20648036891',
194 'only_matching
': True,
196 'url
': 'http
://m
.ok
.ru
/video
/20079905452',
197 'only_matching
': True,
199 'url
': 'http
://mobile
.ok
.ru
/video
/20079905452',
200 'only_matching
': True,
202 'url
': 'https
://www
.ok
.ru
/live
/484531969818',
203 'only_matching
': True,
205 'url
': 'https
://m
.ok
.ru
/dk?st
.cmd
=movieLayer
&st
.discId
=863789452017&st
.retLoc
=friend
&st
.rtu
=%2Fdk
%3Fst
.cmd
%3DfriendMovies
%26st
.mode
%3Down
%26st
.mrkId
%3D
%257B
%2522uploadedMovieMarker
%2522%253A
%257B
%2522marker
%2522%253A
%25221519410114503%2522%252C
%2522hasMore
%2522%253Atrue
%257D
%252C
%2522sharedMovieMarker
%2522%253A
%257B
%2522marker
%2522%253Anull
%252C
%2522hasMore
%2522%253Afalse
%257D
%257D
%26st
.friendId
%3D561722190321
%26st
.frwd
%3Don
%26_prevCmd
%3DfriendMovies
%26tkn
%3D7257
&st
.discType
=MOVIE
&st
.mvId
=863789452017&_prevCmd
=friendMovies
&tkn
=3648#lst#',
206 'only_matching': True,
209 'url': 'https://ok.ru/video/954886983203',
210 'only_matching': True,
212 'url': 'https://ok.ru/videoembed/2932705602075',
214 'id': '2932705602075',
216 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
217 'title': 'Boosty для тебя!',
218 'uploader_id': '597811038747',
225 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
227 'id': '3950343629563',
229 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
230 'title': 'Заяц Бусти.mp4',
231 'uploader_id': '571368965883',
235 'skip': 'Site no longer embeds',
238 def _clear_cookies(self
, cdn_url
):
239 # Direct http downloads will fail if CDN cookies are set
240 # so we need to reset them after each format extraction
241 self
.cookiejar
.clear(domain
='.mycdn.me')
242 self
.cookiejar
.clear(domain
=urllib
.parse
.urlparse(cdn_url
).hostname
)
245 def _extract_embed_urls(cls
, url
, webpage
):
246 for x
in super()._extract
_embed
_urls
(url
, webpage
):
247 yield smuggle_url(x
, {'referrer': url
})
249 def _real_extract(self
, url
):
251 return self
._extract
_desktop
(url
)
252 except ExtractorError
as e
:
254 return self
._extract
_mobile
(url
)
255 except ExtractorError
:
256 # error message of desktop webpage is in English
259 def _extract_desktop(self
, url
):
260 start_time
= int_or_none(compat_parse_qs(
261 compat_urllib_parse_urlparse(url
).query
).get('fromTime', [None])[0])
263 url
, smuggled
= unsmuggle_url(url
, {})
264 video_id
, is_embed
= self
._match
_valid
_url
(url
).group('id', 'embed')
265 mode
= 'videoembed' if is_embed
else 'video'
267 webpage
= self
._download
_webpage
(
268 f
'https://ok.ru/{mode}/{video_id}', video_id
,
269 note
='Downloading desktop webpage',
270 headers
={'Referer': smuggled
['referrer']} if smuggled
.get('referrer') else {})
272 error
= self
._search
_regex
(
273 r
'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
274 webpage
, 'error', default
=None)
275 # Direct link from boosty
276 if (error
== 'The author of this video has not been found or is blocked'
277 and not smuggled
.get('referrer') and mode
== 'videoembed'):
278 return self
._extract
_desktop
(smuggle_url(url
, {'referrer': 'https://boosty.to'}))
280 raise ExtractorError(error
, expected
=True)
282 player
= self
._parse
_json
(
283 unescapeHTML(self
._search
_regex
(
284 r
'data-options=(?P<quote>["\'])(?P
<player
>{.+?
%s.+?
})(?P
=quote
)' % video_id,
285 webpage, 'player
', group='player
')),
288 # embedded external player
289 if player.get('isExternalPlayer
') and player.get('url
'):
290 return self.url_result(player['url
'])
292 flashvars = player['flashvars
']
294 metadata = flashvars.get('metadata
')
296 metadata = self._parse_json(metadata, video_id)
299 st_location = flashvars.get('location
')
301 data['st
.location
'] = st_location
302 metadata = self._download_json(
303 compat_urllib_parse_unquote(flashvars['metadataUrl
']),
304 video_id, 'Downloading metadata JSON
',
305 data=urlencode_postdata(data))
307 movie = metadata['movie
']
309 # Some embedded videos may not contain title in movie dict (e.g.
310 # http://ok.ru/video/62036049272859-0) thus we allow missing title
311 # here and it's going to be extracted later by an extractor that
312 # will process the actual embed.
313 provider
= metadata
.get('provider')
314 title
= movie
['title'] if provider
== 'UPLOADED_ODKL' else movie
.get('title')
316 thumbnail
= movie
.get('poster')
317 duration
= int_or_none(movie
.get('duration'))
319 author
= metadata
.get('author', {})
320 uploader_id
= author
.get('id')
321 uploader
= author
.get('name')
323 upload_date
= unified_strdate(self
._html
_search
_meta
(
324 'ya:ovs:upload_date', webpage
, 'upload date', default
=None))
327 adult
= self
._html
_search
_meta
(
328 'ya:ovs:adult', webpage
, 'age limit', default
=None)
330 age_limit
= 18 if adult
== 'true' else 0
332 like_count
= int_or_none(metadata
.get('likeCount'))
335 for sub
in traverse_obj(metadata
, ('movie', 'subtitleTracks', ...), expected_type
=dict):
336 sub_url
= sub
.get('url')
339 subtitles
.setdefault(sub
.get('language') or 'en', []).append({
347 'thumbnail': thumbnail
,
348 'duration': duration
,
349 'upload_date': upload_date
,
350 'uploader': uploader
,
351 'uploader_id': uploader_id
,
352 'like_count': like_count
,
353 'age_limit': age_limit
,
354 'start_time': start_time
,
355 'subtitles': subtitles
,
359 if provider
== 'OPEN_GRAPH':
361 '_type': 'url_transparent',
362 'url': movie
['contentId'],
366 if provider
== 'USER_YOUTUBE':
368 '_type': 'url_transparent',
369 'url': movie
['contentId'],
374 if provider
== 'LIVE_TV_APP':
375 info
['title'] = title
377 quality
= qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
382 'format_id': f
.get('name'),
383 } for f
in traverse_obj(metadata
, ('videos', lambda _
, v
: url_or_none(v
['url'])))]
385 m3u8_url
= traverse_obj(metadata
, 'hlsManifestUrl', 'ondemandHls')
387 formats
.extend(self
._extract
_m
3u8_formats
(
388 m3u8_url
, video_id
, 'mp4', 'm3u8_native',
389 m3u8_id
='hls', fatal
=False))
390 self
._clear
_cookies
(m3u8_url
)
392 for mpd_id
, mpd_key
in [('dash', 'ondemandDash'), ('webm', 'metadataWebmUrl')]:
393 mpd_url
= metadata
.get(mpd_key
)
395 formats
.extend(self
._extract
_mpd
_formats
(
396 mpd_url
, video_id
, mpd_id
=mpd_id
, fatal
=False))
397 self
._clear
_cookies
(mpd_url
)
399 dash_manifest
= metadata
.get('metadataEmbedded')
401 formats
.extend(self
._parse
_mpd
_formats
(
402 compat_etree_fromstring(dash_manifest
), 'mpd'))
405 fmt_type
= self
._search
_regex
(
406 r
'\btype[/=](\d)', fmt
['url'],
407 'format type', default
=None)
409 fmt
['quality'] = quality(fmt_type
)
412 m3u8_url
= metadata
.get('hlsMasterPlaylistUrl')
414 formats
.extend(self
._extract
_m
3u8_formats
(
415 m3u8_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=False))
416 self
._clear
_cookies
(m3u8_url
)
417 rtmp_url
= metadata
.get('rtmpUrl')
426 payment_info
= metadata
.get('paymentInfo')
428 self
.raise_no_formats('This video is paid, subscribe to download it', expected
=True)
430 info
['formats'] = formats
433 def _extract_mobile(self
, url
):
434 video_id
= self
._match
_id
(url
)
436 webpage
= self
._download
_webpage
(
437 'http://m.ok.ru/video/%s' % video_id
, video_id
,
438 note
='Downloading mobile webpage')
440 error
= self
._search
_regex
(
441 r
'видео</a>\s*<div\s+class="empty">(.+?)</div>',
442 webpage
, 'error', default
=None)
444 raise ExtractorError(error
, expected
=True)
446 json_data
= self
._search
_regex
(
447 r
'data-video="(.+?)"', webpage
, 'json data')
448 json_data
= self
._parse
_json
(unescapeHTML(json_data
), video_id
) or {}
450 redirect_url
= self
._request
_webpage
(HEADRequest(
451 json_data
['videoSrc']), video_id
, 'Requesting download URL').url
452 self
._clear
_cookies
(redirect_url
)
456 'title': json_data
.get('videoName'),
457 'duration': float_or_none(json_data
.get('videoDuration'), scale
=1000),
458 'thumbnail': json_data
.get('videoPosterSrc'),
460 'format_id': 'mobile',