yt_dlp/extractor/mocha.py

   1 from .common import InfoExtractor
   2 from ..utils import int_or_none, traverse_obj
   3
   4
   5 class MochaVideoIE(InfoExtractor):
   6     _VALID_URL = r'https?://video\.mocha\.com\.vn/(?P<video_slug>[\w-]+)'
   7     _TESTS = [{
   8         'url': 'http://video.mocha.com.vn/chuyen-meo-gia-su-tu-thong-diep-cuoc-song-v18694039',
   9         'info_dict': {
  10             'id': '18694039',
  11             'title': 'Chuyện mèo giả sư tử | Thông điệp cuộc sống',
  12             'ext': 'mp4',
  13             'view_count': int,
  14             'like_count': int,
  15             'dislike_count': int,
  16             'display_id': 'chuyen-meo-gia-su-tu-thong-diep-cuoc-song',
  17             'thumbnail': 'http://mcvideomd1fr.keeng.net/playnow/images/20220505/ad0a055d-2f69-42ca-b888-4790041fe6bc_640x480.jpg',
  18             'description': '',
  19             'duration': 70,
  20             'timestamp': 1652254203,
  21             'upload_date': '20220511',
  22             'comment_count': int,
  23             'categories': ['Kids'],
  24         },
  25     }]
  26
  27     def _real_extract(self, url):
  28         video_slug = self._match_valid_url(url).group('video_slug')
  29         json_data = self._download_json(
  30             'http://apivideo.mocha.com.vn:8081/onMediaBackendBiz/mochavideo/getVideoDetail',
  31             video_slug, query={'url': url, 'token': ''})['data']['videoDetail']
  32         video_id = str(json_data['id'])
  33         video_urls = (json_data.get('list_resolution') or []) + [json_data.get('original_path')]
  34
  35         formats, subtitles = [], {}
  36         for video in video_urls:
  37             if isinstance(video, str):
  38                 formats.extend([{'url': video, 'ext': 'mp4'}])
  39             else:
  40                 fmts, subs = self._extract_m3u8_formats_and_subtitles(
  41                     video.get('video_path'), video_id, ext='mp4')
  42                 formats.extend(fmts)
  43                 self._merge_subtitles(subs, target=subtitles)
  44
  45         return {
  46             'id': video_id,
  47             'display_id': json_data.get('slug') or video_slug,
  48             'title': json_data.get('name'),
  49             'formats': formats,
  50             'subtitles': subtitles,
  51             'description': json_data.get('description'),
  52             'duration': json_data.get('durationS'),
  53             'view_count': json_data.get('total_view'),
  54             'like_count': json_data.get('total_like'),
  55             'dislike_count': json_data.get('total_unlike'),
  56             'thumbnail': json_data.get('image_path_thumb'),
  57             'timestamp': int_or_none(json_data.get('publish_time'), scale=1000),
  58             'is_live': json_data.get('isLive'),
  59             'channel': traverse_obj(json_data, ('channels', '0', 'name')),
  60             'channel_id': traverse_obj(json_data, ('channels', '0', 'id')),
  61             'channel_follower_count': traverse_obj(json_data, ('channels', '0', 'numfollow')),
  62             'categories': traverse_obj(json_data, ('categories', ..., 'categoryname')),
  63             'comment_count': json_data.get('total_comment'),
  64         }