yt_dlp/extractor/plvideo.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     float_or_none,
   4     int_or_none,
   5     parse_iso8601,
   6     parse_resolution,
   7     url_or_none,
   8 )
   9 from ..utils.traversal import traverse_obj
  10
  11
  12 class PlVideoIE(InfoExtractor):
  13     IE_DESC = 'Платформа'
  14     _VALID_URL = r'https?://(?:www\.)?plvideo\.ru/(?:watch\?(?:[^#]+&)?v=|shorts/)(?P<id>[\w-]+)'
  15     _TESTS = [{
  16         'url': 'https://plvideo.ru/watch?v=Y5JzUzkcQTMK',
  17         'md5': 'fe8e18aca892b3b31f3bf492169f8a26',
  18         'info_dict': {
  19             'id': 'Y5JzUzkcQTMK',
  20             'ext': 'mp4',
  21             'thumbnail': 'https://img.plvideo.ru/images/fp-2024-images/v/cover/37/dd/37dd00a4c96c77436ab737e85947abd7/original663a4a3bb713e5.33151959.jpg',
  22             'title': 'Presidente de Cuba llega a Moscú en una visita de trabajo',
  23             'channel': 'RT en Español',
  24             'channel_id': 'ZH4EKqunVDvo',
  25             'media_type': 'video',
  26             'comment_count': int,
  27             'tags': ['rusia', 'cuba', 'russia', 'miguel díaz-canel'],
  28             'description': 'md5:a1a395d900d77a86542a91ee0826c115',
  29             'released_timestamp': 1715096124,
  30             'channel_is_verified': True,
  31             'like_count': int,
  32             'timestamp': 1715095911,
  33             'duration': 44320,
  34             'view_count': int,
  35             'dislike_count': int,
  36             'upload_date': '20240507',
  37             'modified_date': '20240701',
  38             'channel_follower_count': int,
  39             'modified_timestamp': 1719824073,
  40         },
  41     }, {
  42         'url': 'https://plvideo.ru/shorts/S3Uo9c-VLwFX',
  43         'md5': '7d8fa2279406c69d2fd2a6fc548a9805',
  44         'info_dict': {
  45             'id': 'S3Uo9c-VLwFX',
  46             'ext': 'mp4',
  47             'channel': 'Romaatom',
  48             'tags': 'count:22',
  49             'dislike_count': int,
  50             'upload_date': '20241130',
  51             'description': 'md5:452e6de219bf2f32bb95806c51c3b364',
  52             'duration': 58433,
  53             'modified_date': '20241130',
  54             'thumbnail': 'https://img.plvideo.ru/images/fp-2024-11-cover/S3Uo9c-VLwFX/f9318999-a941-482b-b700-2102a7049366.jpg',
  55             'media_type': 'shorts',
  56             'like_count': int,
  57             'modified_timestamp': 1732961458,
  58             'channel_is_verified': True,
  59             'channel_id': 'erJyyTIbmUd1',
  60             'timestamp': 1732961355,
  61             'comment_count': int,
  62             'title': 'Белоусов отменил приказы о кадровом резерве на гражданской службе',
  63             'channel_follower_count': int,
  64             'view_count': int,
  65             'released_timestamp': 1732961458,
  66         },
  67     }]
  68
  69     def _real_extract(self, url):
  70         video_id = self._match_id(url)
  71
  72         video_data = self._download_json(
  73             f'https://api.g1.plvideo.ru/v1/videos/{video_id}?Aud=18', video_id)
  74
  75         is_live = False
  76         formats = []
  77         subtitles = {}
  78         automatic_captions = {}
  79         for quality, data in traverse_obj(video_data, ('item', 'profiles', {dict.items}, lambda _, v: url_or_none(v[1]['hls']))):
  80             formats.append({
  81                 'format_id': quality,
  82                 'ext': 'mp4',
  83                 'protocol': 'm3u8_native',
  84                 **traverse_obj(data, {
  85                     'url': 'hls',
  86                     'fps': ('fps', {float_or_none}),
  87                     'aspect_ratio': ('aspectRatio', {float_or_none}),
  88                 }),
  89                 **parse_resolution(quality),
  90             })
  91         if livestream_url := traverse_obj(video_data, ('item', 'livestream', 'url', {url_or_none})):
  92             is_live = True
  93             formats.extend(self._extract_m3u8_formats(livestream_url, video_id, 'mp4', live=True))
  94         for lang, url in traverse_obj(video_data, ('item', 'subtitles', {dict.items}, lambda _, v: url_or_none(v[1]))):
  95             if lang.endswith('-auto'):
  96                 automatic_captions.setdefault(lang[:-5], []).append({
  97                     'url': url,
  98                 })
  99             else:
 100                 subtitles.setdefault(lang, []).append({
 101                     'url': url,
 102                 })
 103
 104         return {
 105             'id': video_id,
 106             'formats': formats,
 107             'subtitles': subtitles,
 108             'automatic_captions': automatic_captions,
 109             'is_live': is_live,
 110             **traverse_obj(video_data, ('item', {
 111                 'id': ('id', {str}),
 112                 'title': ('title', {str}),
 113                 'description': ('description', {str}),
 114                 'thumbnail': ('cover', 'paths', 'original', 'src', {url_or_none}),
 115                 'duration': ('uploadFile', 'videoDuration', {int_or_none}),
 116                 'channel': ('channel', 'name', {str}),
 117                 'channel_id': ('channel', 'id', {str}),
 118                 'channel_follower_count': ('channel', 'stats', 'subscribers', {int_or_none}),
 119                 'channel_is_verified': ('channel', 'verified', {bool}),
 120                 'tags': ('tags', ..., {str}),
 121                 'timestamp': ('createdAt', {parse_iso8601}),
 122                 'released_timestamp': ('publishedAt', {parse_iso8601}),
 123                 'modified_timestamp': ('updatedAt', {parse_iso8601}),
 124                 'view_count': ('stats', 'viewTotalCount', {int_or_none}),
 125                 'like_count': ('stats', 'likeCount', {int_or_none}),
 126                 'dislike_count': ('stats', 'dislikeCount', {int_or_none}),
 127                 'comment_count': ('stats', 'commentCount', {int_or_none}),
 128                 'media_type': ('type', {str}),
 129             })),
 130         }