3 from .common
import InfoExtractor
4 from ..compat
import compat_etree_fromstring
5 from ..networking
import HEADRequest
21 class OdnoklassnikiIE(InfoExtractor
):
24 (?:(?:www|m|mobile)\.)?
25 (?:odnoklassniki|ok)\.ru/
27 video(?P<embed>embed)?/|
28 web-api/video/moviePlayer/|
34 _EMBED_REGEX
= [r
'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:odnoklassniki|ok
)\
.ru
/videoembed
/.+?
)\
1']
36 'note
': 'Coub embedded
',
37 'url
': 'http
://ok
.ru
/video
/1484130554189',
41 'timestamp
': 1545580896,
43 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
44 'title
': 'Народная забава
',
46 'upload_date
': '20181223',
48 'uploader_id
': 'nevata
.s
',
54 'note
': 'vk
.com embedded
',
55 'url
': 'https
://ok
.ru
/video
/3568183087575',
57 'id': '-165101755_456243749',
59 'uploader_id
': '-165101755',
61 'timestamp
': 1642869935,
62 'upload_date
': '20220122',
67 'skip
': 'vk extractor error
',
69 # metadata in JSON, webm_dash with Firefox UA
70 'url
': 'http
://ok
.ru
/video
/20079905452',
71 'md5
': '8f477d8931c531374a3e36daec617b2c
',
75 'title
': 'Культура меняет нас
(прекрасный ролик
!))',
78 'upload_date
': '20141207',
79 'uploader_id
': '330537914540',
80 'uploader
': 'Виталий Добровольский
',
85 'format
': 'bv
[ext
=webm
]',
86 'http_headers
': {'User
-Agent
': 'Mozilla
/5.0 (Windows NT
10.0; rv
:102.0) Gecko
/20100101 Firefox
/102.0'},
90 'url
': 'http
://ok
.ru
/video
/63567059965189-0?fromTime
=5',
91 'md5
': '2bae2f58eefe1b3d26f3926c4a64d2f3
',
93 'id': '63567059965189-0',
95 'title
': 'Девушка без комплексов
...',
98 'upload_date
': '20150518',
99 'uploader_id
': '534380003155',
100 'uploader
': '☭ Андрей Мещанинов ☭
',
105 'params
': {'skip_download
': 'm3u8
'},
107 # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
108 'url
': 'https
://ok
.ru
/video
/3952212382174',
109 'md5
': '5fb5f83ce16cb212d6bf887282b5da53
',
113 'title
': 'Youtube
-dl
101: What
is it
and HOW to use it
! Full Download Walkthrough
and Guide
',
114 'description
': 'md5
:b57209eeb9d5c2f20c984dfb58862097
',
115 'uploader
': 'Lod Mer
',
116 'uploader_id
': '575186401502',
119 'upload_date
': '20210405',
120 'comment_count
': int,
121 'live_status
': 'not_live
',
123 'thumbnail
': 'https
://i
.mycdn
.me
/i?r
=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5
-HIEAt7Zi9s0CiBOSDmbngC
-I
-k
&fn
=external_8
',
124 'uploader_url
': 'https
://www
.youtube
.com
/@MrKewlkid94',
125 'channel_follower_count
': int,
126 'tags
': ['youtube
-dl
', 'youtube playlists
', 'download videos
', 'download audio
'],
127 'channel_id
': 'UCVGtvURtEURYHtJFUegdSug
',
129 'availability
': 'public
',
130 'channel_url
': 'https
://www
.youtube
.com
/channel
/UCVGtvURtEURYHtJFUegdSug
',
131 'categories
': ['Education
'],
132 'playable_in_embed
': True,
133 'channel
': 'BornToReact
',
136 # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
137 'url
': 'http
://ok
.ru
/video
/62036049272859-0',
139 'id': '62036049272859-0',
141 'title
': 'МУЗЫКА ДОЖДЯ
.',
142 'description
': 'md5
:6f1867132bd96e33bf53eda1091e8ed0
',
143 'upload_date
': '20120106',
144 'uploader_id
': '473534735899',
145 'uploader
': 'МARINA D
',
149 'skip_download
': True,
151 'skip
': 'Video has
not been found
',
153 'note
': 'Only available
in mobile webpage
',
154 'url
': 'https
://m
.ok
.ru
/video
/2361249957145',
156 'id': '2361249957145',
158 'title
': 'Быковское крещение
',
159 'duration
': 3038.181,
160 'thumbnail
': r're
:^https?
://i\
.mycdn\
.me
/videoPreview
\?.+',
164 'url
': 'https
://ok
.ru
/video
/4249587550747',
166 'id': '4249587550747',
168 'title
': 'Small Country An African
Childhood (2020) (1080p
) +subtitle
',
169 'uploader
': 'Sunflower Movies
',
170 'uploader_id
': '595802161179',
171 'upload_date
': '20220816',
174 'thumbnail
': r're
:^https?
://i\
.mycdn\
.me
/videoPreview
\?.+',
179 'skip_download
': True,
182 'url
': 'http
://ok
.ru
/web
-api
/video
/moviePlayer
/20079905452',
183 'only_matching
': True,
185 'url
': 'http
://www
.ok
.ru
/video
/20648036891',
186 'only_matching
': True,
188 'url
': 'http
://www
.ok
.ru
/videoembed
/20648036891',
189 'only_matching
': True,
191 'url
': 'http
://m
.ok
.ru
/video
/20079905452',
192 'only_matching
': True,
194 'url
': 'http
://mobile
.ok
.ru
/video
/20079905452',
195 'only_matching
': True,
197 'url
': 'https
://www
.ok
.ru
/live
/484531969818',
198 'only_matching
': True,
200 'url
': 'https
://m
.ok
.ru
/dk?st
.cmd
=movieLayer
&st
.discId
=863789452017&st
.retLoc
=friend
&st
.rtu
=%2Fdk
%3Fst
.cmd
%3DfriendMovies
%26st
.mode
%3Down
%26st
.mrkId
%3D
%257B
%2522uploadedMovieMarker
%2522%253A
%257B
%2522marker
%2522%253A
%25221519410114503%2522%252C
%2522hasMore
%2522%253Atrue
%257D
%252C
%2522sharedMovieMarker
%2522%253A
%257B
%2522marker
%2522%253Anull
%252C
%2522hasMore
%2522%253Afalse
%257D
%257D
%26st
.friendId
%3D561722190321
%26st
.frwd
%3Don
%26_prevCmd
%3DfriendMovies
%26tkn
%3D7257
&st
.discType
=MOVIE
&st
.mvId
=863789452017&_prevCmd
=friendMovies
&tkn
=3648#lst#',
201 'only_matching': True,
204 'url': 'https://ok.ru/video/954886983203',
205 'only_matching': True,
207 'url': 'https://ok.ru/videoembed/2932705602075',
209 'id': '2932705602075',
211 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
212 'title': 'Boosty для тебя!',
213 'uploader_id': '597811038747',
220 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
222 'id': '3950343629563',
224 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
225 'title': 'Заяц Бусти.mp4',
226 'uploader_id': '571368965883',
230 'skip': 'Site no longer embeds',
233 def _clear_cookies(self
, cdn_url
):
234 # Direct http downloads will fail if CDN cookies are set
235 # so we need to reset them after each format extraction
236 self
.cookiejar
.clear(domain
='.mycdn.me')
237 self
.cookiejar
.clear(domain
=urllib
.parse
.urlparse(cdn_url
).hostname
)
240 def _extract_embed_urls(cls
, url
, webpage
):
241 for x
in super()._extract
_embed
_urls
(url
, webpage
):
242 yield smuggle_url(x
, {'referrer': url
})
244 def _real_extract(self
, url
):
246 return self
._extract
_desktop
(url
)
247 except ExtractorError
as e
:
249 return self
._extract
_mobile
(url
)
250 except ExtractorError
:
251 # error message of desktop webpage is in English
254 def _extract_desktop(self
, url
):
255 start_time
= int_or_none(urllib
.parse
.parse_qs(
256 urllib
.parse
.urlparse(url
).query
).get('fromTime', [None])[0])
258 url
, smuggled
= unsmuggle_url(url
, {})
259 video_id
, is_embed
= self
._match
_valid
_url
(url
).group('id', 'embed')
260 mode
= 'videoembed' if is_embed
else 'video'
262 webpage
= self
._download
_webpage
(
263 f
'https://ok.ru/{mode}/{video_id}', video_id
,
264 note
='Downloading desktop webpage',
265 headers
={'Referer': smuggled
['referrer']} if smuggled
.get('referrer') else {})
267 error
= self
._search
_regex
(
268 r
'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
269 webpage
, 'error', default
=None)
270 # Direct link from boosty
271 if (error
== 'The author of this video has not been found or is blocked'
272 and not smuggled
.get('referrer') and mode
== 'videoembed'):
273 return self
._extract
_desktop
(smuggle_url(url
, {'referrer': 'https://boosty.to'}))
275 raise ExtractorError(error
, expected
=True)
277 player
= self
._parse
_json
(
278 unescapeHTML(self
._search
_regex
(
279 rf
'data-options=(?P<quote>["\'])(?P<player>{{.+?{video_id}.+?}})(?P=quote)',
280 webpage
, 'player', group
='player')),
283 # embedded external player
284 if player
.get('isExternalPlayer') and player
.get('url'):
285 return self
.url_result(player
['url'])
287 flashvars
= player
['flashvars']
289 metadata
= flashvars
.get('metadata')
291 metadata
= self
._parse
_json
(metadata
, video_id
)
294 st_location
= flashvars
.get('location')
296 data
['st.location'] = st_location
297 metadata
= self
._download
_json
(
298 urllib
.parse
.unquote(flashvars
['metadataUrl']),
299 video_id
, 'Downloading metadata JSON',
300 data
=urlencode_postdata(data
))
302 movie
= metadata
['movie']
304 # Some embedded videos may not contain title in movie dict (e.g.
305 # http://ok.ru/video/62036049272859-0) thus we allow missing title
306 # here and it's going to be extracted later by an extractor that
307 # will process the actual embed.
308 provider
= metadata
.get('provider')
309 title
= movie
['title'] if provider
== 'UPLOADED_ODKL' else movie
.get('title')
311 thumbnail
= movie
.get('poster')
312 duration
= int_or_none(movie
.get('duration'))
314 author
= metadata
.get('author', {})
315 uploader_id
= author
.get('id')
316 uploader
= author
.get('name')
318 upload_date
= unified_strdate(self
._html
_search
_meta
(
319 'ya:ovs:upload_date', webpage
, 'upload date', default
=None))
322 adult
= self
._html
_search
_meta
(
323 'ya:ovs:adult', webpage
, 'age limit', default
=None)
325 age_limit
= 18 if adult
== 'true' else 0
327 like_count
= int_or_none(metadata
.get('likeCount'))
330 for sub
in traverse_obj(metadata
, ('movie', 'subtitleTracks', ...), expected_type
=dict):
331 sub_url
= sub
.get('url')
334 subtitles
.setdefault(sub
.get('language') or 'en', []).append({
342 'thumbnail': thumbnail
,
343 'duration': duration
,
344 'upload_date': upload_date
,
345 'uploader': uploader
,
346 'uploader_id': uploader_id
,
347 'like_count': like_count
,
348 'age_limit': age_limit
,
349 'start_time': start_time
,
350 'subtitles': subtitles
,
354 if provider
== 'OPEN_GRAPH':
356 '_type': 'url_transparent',
357 'url': movie
['contentId'],
361 if provider
== 'USER_YOUTUBE':
363 '_type': 'url_transparent',
364 'url': movie
['contentId'],
369 if provider
== 'LIVE_TV_APP':
370 info
['title'] = title
372 quality
= qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
377 'format_id': f
.get('name'),
378 } for f
in traverse_obj(metadata
, ('videos', lambda _
, v
: url_or_none(v
['url'])))]
380 m3u8_url
= traverse_obj(metadata
, 'hlsManifestUrl', 'ondemandHls')
382 formats
.extend(self
._extract
_m
3u8_formats
(
383 m3u8_url
, video_id
, 'mp4', 'm3u8_native',
384 m3u8_id
='hls', fatal
=False))
385 self
._clear
_cookies
(m3u8_url
)
387 for mpd_id
, mpd_key
in [('dash', 'ondemandDash'), ('webm', 'metadataWebmUrl')]:
388 mpd_url
= metadata
.get(mpd_key
)
390 formats
.extend(self
._extract
_mpd
_formats
(
391 mpd_url
, video_id
, mpd_id
=mpd_id
, fatal
=False))
392 self
._clear
_cookies
(mpd_url
)
394 dash_manifest
= metadata
.get('metadataEmbedded')
396 formats
.extend(self
._parse
_mpd
_formats
(
397 compat_etree_fromstring(dash_manifest
), 'mpd'))
400 fmt_type
= self
._search
_regex
(
401 r
'\btype[/=](\d)', fmt
['url'],
402 'format type', default
=None)
404 fmt
['quality'] = quality(fmt_type
)
407 m3u8_url
= metadata
.get('hlsMasterPlaylistUrl')
409 formats
.extend(self
._extract
_m
3u8_formats
(
410 m3u8_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=False))
411 self
._clear
_cookies
(m3u8_url
)
412 rtmp_url
= metadata
.get('rtmpUrl')
421 payment_info
= metadata
.get('paymentInfo')
423 self
.raise_no_formats('This video is paid, subscribe to download it', expected
=True)
425 info
['formats'] = formats
428 def _extract_mobile(self
, url
):
429 video_id
= self
._match
_id
(url
)
431 webpage
= self
._download
_webpage
(
432 f
'http://m.ok.ru/video/{video_id}', video_id
,
433 note
='Downloading mobile webpage')
435 error
= self
._search
_regex
(
436 r
'видео</a>\s*<div\s+class="empty">(.+?)</div>',
437 webpage
, 'error', default
=None)
439 raise ExtractorError(error
, expected
=True)
441 json_data
= self
._search
_regex
(
442 r
'data-video="(.+?)"', webpage
, 'json data')
443 json_data
= self
._parse
_json
(unescapeHTML(json_data
), video_id
) or {}
445 redirect_url
= self
._request
_webpage
(HEADRequest(
446 json_data
['videoSrc']), video_id
, 'Requesting download URL').url
447 self
._clear
_cookies
(redirect_url
)
451 'title': json_data
.get('videoName'),
452 'duration': float_or_none(json_data
.get('videoDuration'), scale
=1000),
453 'thumbnail': json_data
.get('videoPosterSrc'),
455 'format_id': 'mobile',