[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / tvigle.py
blob6b87ecac745b410cf1c750685541eb87404534f4
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 float_or_none,
5 int_or_none,
6 parse_age_limit,
7 try_get,
8 url_or_none,
12 class TvigleIE(InfoExtractor):
13 IE_NAME = 'tvigle'
14 IE_DESC = 'Интернет-телевидение Tvigle.ru'
15 _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
16 _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1']
18 _GEO_BYPASS = False
19 _GEO_COUNTRIES = ['RU']
21 _TESTS = [
23 'url': 'http://www.tvigle.ru/video/sokrat/',
24 'info_dict': {
25 'id': '1848932',
26 'display_id': 'sokrat',
27 'ext': 'mp4',
28 'title': 'Сократ',
29 'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17',
30 'duration': 6586,
31 'age_limit': 12,
33 'skip': 'georestricted',
36 'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
37 'info_dict': {
38 'id': '5142516',
39 'ext': 'flv',
40 'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
41 'description': 'md5:027f7dc872948f14c96d19b4178428a4',
42 'duration': 186.080,
43 'age_limit': 0,
45 'skip': 'georestricted',
46 }, {
47 'url': 'https://cloud.tvigle.ru/video/5267604/',
48 'only_matching': True,
52 def _real_extract(self, url):
53 mobj = self._match_valid_url(url)
54 video_id = mobj.group('id')
55 display_id = mobj.group('display_id')
57 if not video_id:
58 webpage = self._download_webpage(url, display_id)
59 video_id = self._html_search_regex(
60 (r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
61 r'cloudId\s*=\s*["\'](\d+)',
62 r'class="video-preview current_playing" id="(\d+)"'),
63 webpage, 'video id')
65 video_data = self._download_json(
66 f'http://cloud.tvigle.ru/api/play/video/{video_id}/', display_id)
68 item = video_data['playlist']['items'][0]
70 videos = item.get('videos')
72 error_message = item.get('errorMessage')
73 if not videos and error_message:
74 if item.get('isGeoBlocked') is True:
75 self.raise_geo_restricted(
76 msg=error_message, countries=self._GEO_COUNTRIES)
77 else:
78 raise ExtractorError(
79 f'{self.IE_NAME} returned error: {error_message}',
80 expected=True)
82 title = item['title']
83 description = item.get('description')
84 thumbnail = item.get('thumbnail')
85 duration = float_or_none(item.get('durationMilliseconds'), 1000)
86 age_limit = parse_age_limit(item.get('ageRestrictions'))
88 formats = []
89 for vcodec, url_or_fmts in item['videos'].items():
90 if vcodec == 'hls':
91 m3u8_url = url_or_none(url_or_fmts)
92 if not m3u8_url:
93 continue
94 formats.extend(self._extract_m3u8_formats(
95 m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
96 m3u8_id='hls', fatal=False))
97 elif vcodec == 'dash':
98 mpd_url = url_or_none(url_or_fmts)
99 if not mpd_url:
100 continue
101 formats.extend(self._extract_mpd_formats(
102 mpd_url, video_id, mpd_id='dash', fatal=False))
103 else:
104 if not isinstance(url_or_fmts, dict):
105 continue
106 for format_id, video_url in url_or_fmts.items():
107 if format_id == 'm3u8':
108 continue
109 video_url = url_or_none(video_url)
110 if not video_url:
111 continue
112 height = self._search_regex(
113 r'^(\d+)[pP]$', format_id, 'height', default=None)
114 filesize = int_or_none(try_get(
115 item, lambda x: x['video_files_size'][vcodec][format_id]))
116 formats.append({
117 'url': video_url,
118 'format_id': f'{vcodec}-{format_id}',
119 'vcodec': vcodec,
120 'height': int_or_none(height),
121 'filesize': filesize,
124 return {
125 'id': video_id,
126 'display_id': display_id,
127 'title': title,
128 'description': description,
129 'thumbnail': thumbnail,
130 'duration': duration,
131 'age_limit': age_limit,
132 'formats': formats,