yt_dlp/extractor/kinopoisk.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     dict_get,
   4     int_or_none,
   5 )
   6
   7
   8 class KinoPoiskIE(InfoExtractor):
   9     _GEO_COUNTRIES = ['RU']
  10     _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
  11     _TESTS = [{
  12         'url': 'https://www.kinopoisk.ru/film/81041/watch/',
  13         'md5': '4f71c80baea10dfa54a837a46111d326',
  14         'info_dict': {
  15             'id': '81041',
  16             'ext': 'mp4',
  17             'title': 'Алеша попович и тугарин змей',
  18             'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
  19             'thumbnail': r're:^https?://.*',
  20             'duration': 4533,
  21             'age_limit': 12,
  22         },
  23     }, {
  24         'url': 'https://www.kinopoisk.ru/film/81041',
  25         'only_matching': True,
  26     }]
  27
  28     def _real_extract(self, url):
  29         video_id = self._match_id(url)
  30
  31         webpage = self._download_webpage(
  32             'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
  33             query={'kpId': video_id})
  34
  35         data = self._parse_json(
  36             self._search_regex(
  37                 r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
  38                 webpage, 'data'),
  39             video_id)['models']
  40
  41         film = data['filmStatus']
  42         title = film.get('title') or film['originalTitle']
  43
  44         formats = self._extract_m3u8_formats(
  45             data['playlistEntity']['uri'], video_id, 'mp4',
  46             entry_protocol='m3u8_native', m3u8_id='hls')
  47
  48         description = dict_get(
  49             film, ('descriptscription', 'description',
  50                    'shortDescriptscription', 'shortDescription'))
  51         thumbnail = film.get('coverUrl') or film.get('posterUrl')
  52         duration = int_or_none(film.get('duration'))
  53         age_limit = int_or_none(film.get('restrictionAge'))
  54
  55         return {
  56             'id': video_id,
  57             'title': title,
  58             'description': description,
  59             'thumbnail': thumbnail,
  60             'duration': duration,
  61             'age_limit': age_limit,
  62             'formats': formats,
  63         }