[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / plvideo.py
blob9351af10ae0332b9c5e06f66af9c73829744aac0
1 from .common import InfoExtractor
2 from ..utils import (
3 float_or_none,
4 int_or_none,
5 parse_iso8601,
6 parse_resolution,
7 url_or_none,
9 from ..utils.traversal import traverse_obj
12 class PlVideoIE(InfoExtractor):
13 IE_DESC = 'Платформа'
14 _VALID_URL = r'https?://(?:www\.)?plvideo\.ru/(?:watch\?(?:[^#]+&)?v=|shorts/)(?P<id>[\w-]+)'
15 _TESTS = [{
16 'url': 'https://plvideo.ru/watch?v=Y5JzUzkcQTMK',
17 'md5': 'fe8e18aca892b3b31f3bf492169f8a26',
18 'info_dict': {
19 'id': 'Y5JzUzkcQTMK',
20 'ext': 'mp4',
21 'thumbnail': 'https://img.plvideo.ru/images/fp-2024-images/v/cover/37/dd/37dd00a4c96c77436ab737e85947abd7/original663a4a3bb713e5.33151959.jpg',
22 'title': 'Presidente de Cuba llega a Moscú en una visita de trabajo',
23 'channel': 'RT en Español',
24 'channel_id': 'ZH4EKqunVDvo',
25 'media_type': 'video',
26 'comment_count': int,
27 'tags': ['rusia', 'cuba', 'russia', 'miguel díaz-canel'],
28 'description': 'md5:a1a395d900d77a86542a91ee0826c115',
29 'released_timestamp': 1715096124,
30 'channel_is_verified': True,
31 'like_count': int,
32 'timestamp': 1715095911,
33 'duration': 44320,
34 'view_count': int,
35 'dislike_count': int,
36 'upload_date': '20240507',
37 'modified_date': '20240701',
38 'channel_follower_count': int,
39 'modified_timestamp': 1719824073,
41 }, {
42 'url': 'https://plvideo.ru/shorts/S3Uo9c-VLwFX',
43 'md5': '7d8fa2279406c69d2fd2a6fc548a9805',
44 'info_dict': {
45 'id': 'S3Uo9c-VLwFX',
46 'ext': 'mp4',
47 'channel': 'Romaatom',
48 'tags': 'count:22',
49 'dislike_count': int,
50 'upload_date': '20241130',
51 'description': 'md5:452e6de219bf2f32bb95806c51c3b364',
52 'duration': 58433,
53 'modified_date': '20241130',
54 'thumbnail': 'https://img.plvideo.ru/images/fp-2024-11-cover/S3Uo9c-VLwFX/f9318999-a941-482b-b700-2102a7049366.jpg',
55 'media_type': 'shorts',
56 'like_count': int,
57 'modified_timestamp': 1732961458,
58 'channel_is_verified': True,
59 'channel_id': 'erJyyTIbmUd1',
60 'timestamp': 1732961355,
61 'comment_count': int,
62 'title': 'Белоусов отменил приказы о кадровом резерве на гражданской службе',
63 'channel_follower_count': int,
64 'view_count': int,
65 'released_timestamp': 1732961458,
69 def _real_extract(self, url):
70 video_id = self._match_id(url)
72 video_data = self._download_json(
73 f'https://api.g1.plvideo.ru/v1/videos/{video_id}?Aud=18', video_id)
75 is_live = False
76 formats = []
77 subtitles = {}
78 automatic_captions = {}
79 for quality, data in traverse_obj(video_data, ('item', 'profiles', {dict.items}, lambda _, v: url_or_none(v[1]['hls']))):
80 formats.append({
81 'format_id': quality,
82 'ext': 'mp4',
83 'protocol': 'm3u8_native',
84 **traverse_obj(data, {
85 'url': 'hls',
86 'fps': ('fps', {float_or_none}),
87 'aspect_ratio': ('aspectRatio', {float_or_none}),
88 }),
89 **parse_resolution(quality),
91 if livestream_url := traverse_obj(video_data, ('item', 'livestream', 'url', {url_or_none})):
92 is_live = True
93 formats.extend(self._extract_m3u8_formats(livestream_url, video_id, 'mp4', live=True))
94 for lang, url in traverse_obj(video_data, ('item', 'subtitles', {dict.items}, lambda _, v: url_or_none(v[1]))):
95 if lang.endswith('-auto'):
96 automatic_captions.setdefault(lang[:-5], []).append({
97 'url': url,
99 else:
100 subtitles.setdefault(lang, []).append({
101 'url': url,
104 return {
105 'id': video_id,
106 'formats': formats,
107 'subtitles': subtitles,
108 'automatic_captions': automatic_captions,
109 'is_live': is_live,
110 **traverse_obj(video_data, ('item', {
111 'id': ('id', {str}),
112 'title': ('title', {str}),
113 'description': ('description', {str}),
114 'thumbnail': ('cover', 'paths', 'original', 'src', {url_or_none}),
115 'duration': ('uploadFile', 'videoDuration', {int_or_none}),
116 'channel': ('channel', 'name', {str}),
117 'channel_id': ('channel', 'id', {str}),
118 'channel_follower_count': ('channel', 'stats', 'subscribers', {int_or_none}),
119 'channel_is_verified': ('channel', 'verified', {bool}),
120 'tags': ('tags', ..., {str}),
121 'timestamp': ('createdAt', {parse_iso8601}),
122 'released_timestamp': ('publishedAt', {parse_iso8601}),
123 'modified_timestamp': ('updatedAt', {parse_iso8601}),
124 'view_count': ('stats', 'viewTotalCount', {int_or_none}),
125 'like_count': ('stats', 'likeCount', {int_or_none}),
126 'dislike_count': ('stats', 'dislikeCount', {int_or_none}),
127 'comment_count': ('stats', 'commentCount', {int_or_none}),
128 'media_type': ('type', {str}),
129 })),