yt_dlp/extractor/chzzk.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     UserNotLive,
   4     float_or_none,
   5     int_or_none,
   6     parse_iso8601,
   7     url_or_none,
   8 )
   9 from ..utils.traversal import traverse_obj
  10
  11
  12 class CHZZKLiveIE(InfoExtractor):
  13     IE_NAME = 'chzzk:live'
  14     _VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[\da-f]+)'
  15     _TESTS = [{
  16         'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753',
  17         'info_dict': {
  18             'id': 'c68b8ef525fb3d2fa146344d84991753',
  19             'ext': 'mp4',
  20             'title': str,
  21             'channel': '진짜도현',
  22             'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
  23             'channel_is_verified': False,
  24             'thumbnail': r're:^https?://.*\.jpg$',
  25             'timestamp': 1705510344,
  26             'upload_date': '20240117',
  27             'live_status': 'is_live',
  28             'view_count': int,
  29             'concurrent_view_count': int,
  30         },
  31         'skip': 'The channel is not currently live',
  32     }]
  33
  34     def _real_extract(self, url):
  35         channel_id = self._match_id(url)
  36         live_detail = self._download_json(
  37             f'https://api.chzzk.naver.com/service/v3/channels/{channel_id}/live-detail', channel_id,
  38             note='Downloading channel info', errnote='Unable to download channel info')['content']
  39
  40         if live_detail.get('status') == 'CLOSE':
  41             raise UserNotLive(video_id=channel_id)
  42
  43         live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
  44
  45         thumbnails = []
  46         thumbnail_template = traverse_obj(
  47             live_playback, ('thumbnail', 'snapshotThumbnailTemplate', {url_or_none}))
  48         if thumbnail_template and '{type}' in thumbnail_template:
  49             for width in traverse_obj(live_playback, ('thumbnail', 'types', ..., {str})):
  50                 thumbnails.append({
  51                     'id': width,
  52                     'url': thumbnail_template.replace('{type}', width),
  53                     'width': int_or_none(width),
  54                 })
  55
  56         formats, subtitles = [], {}
  57         for media in traverse_obj(live_playback, ('media', lambda _, v: url_or_none(v['path']))):
  58             is_low_latency = media.get('mediaId') == 'LLHLS'
  59             fmts, subs = self._extract_m3u8_formats_and_subtitles(
  60                 media['path'], channel_id, 'mp4', fatal=False, live=True,
  61                 m3u8_id='hls-ll' if is_low_latency else 'hls')
  62             for f in fmts:
  63                 if is_low_latency:
  64                     f['source_preference'] = -2
  65                 if '-afragalow.stream-audio.stream' in f['format_id']:
  66                     f['quality'] = -2
  67             formats.extend(fmts)
  68             self._merge_subtitles(subs, target=subtitles)
  69
  70         return {
  71             'id': channel_id,
  72             'is_live': True,
  73             'formats': formats,
  74             'subtitles': subtitles,
  75             'thumbnails': thumbnails,
  76             **traverse_obj(live_detail, {
  77                 'title': ('liveTitle', {str}),
  78                 'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
  79                 'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
  80                 'view_count': ('accumulateCount', {int_or_none}),
  81                 'channel': ('channel', 'channelName', {str}),
  82                 'channel_id': ('channel', 'channelId', {str}),
  83                 'channel_is_verified': ('channel', 'verifiedMark', {bool}),
  84             }),
  85         }
  86
  87
  88 class CHZZKVideoIE(InfoExtractor):
  89     IE_NAME = 'chzzk:video'
  90     _VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)'
  91     _TESTS = [{
  92         'url': 'https://chzzk.naver.com/video/1754',
  93         'md5': 'b0c0c1bb888d913b93d702b1512c7f06',
  94         'info_dict': {
  95             'id': '1754',
  96             'ext': 'mp4',
  97             'title': '치지직 테스트 방송',
  98             'channel': '침착맨',
  99             'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
 100             'channel_is_verified': False,
 101             'thumbnail': r're:^https?://.*\.jpg$',
 102             'duration': 15577,
 103             'timestamp': 1702970505.417,
 104             'upload_date': '20231219',
 105             'view_count': int,
 106         },
 107         'skip': 'Replay video is expired',
 108     }, {
 109         # Manually uploaded video
 110         'url': 'https://chzzk.naver.com/video/1980',
 111         'info_dict': {
 112             'id': '1980',
 113             'ext': 'mp4',
 114             'title': '※시청주의※한번보면 잊기 힘든 영상',
 115             'channel': '라디유radiyu',
 116             'channel_id': '68f895c59a1043bc5019b5e08c83a5c5',
 117             'channel_is_verified': False,
 118             'thumbnail': r're:^https?://.*\.jpg$',
 119             'duration': 95,
 120             'timestamp': 1703102631.722,
 121             'upload_date': '20231220',
 122             'view_count': int,
 123         },
 124     }, {
 125         # Partner channel replay video
 126         'url': 'https://chzzk.naver.com/video/2458',
 127         'info_dict': {
 128             'id': '2458',
 129             'ext': 'mp4',
 130             'title': '첫 방송',
 131             'channel': '강지',
 132             'channel_id': 'b5ed5db484d04faf4d150aedd362f34b',
 133             'channel_is_verified': True,
 134             'thumbnail': r're:^https?://.*\.jpg$',
 135             'duration': 4433,
 136             'timestamp': 1703307460.214,
 137             'upload_date': '20231223',
 138             'view_count': int,
 139         },
 140     }]
 141
 142     def _real_extract(self, url):
 143         video_id = self._match_id(url)
 144         video_meta = self._download_json(
 145             f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
 146             note='Downloading video info', errnote='Unable to download video info')['content']
 147
 148         live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
 149         video_status = video_meta.get('vodStatus')
 150         if video_status == 'UPLOAD':
 151             playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
 152             formats, subtitles = self._extract_m3u8_formats_and_subtitles(
 153                 playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
 154         elif video_status == 'ABR_HLS':
 155             formats, subtitles = self._extract_mpd_formats_and_subtitles(
 156                 f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
 157                 video_id, query={
 158                     'key': video_meta['inKey'],
 159                     'env': 'real',
 160                     'lc': 'en_US',
 161                     'cpl': 'en_US',
 162                 })
 163         else:
 164             self.raise_no_formats(
 165                 f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
 166             formats, subtitles = [], {}
 167             live_status = 'post_live' if live_status == 'was_live' else None
 168
 169         return {
 170             'id': video_id,
 171             'formats': formats,
 172             'subtitles': subtitles,
 173             'live_status': live_status,
 174             **traverse_obj(video_meta, {
 175                 'title': ('videoTitle', {str}),
 176                 'thumbnail': ('thumbnailImageUrl', {url_or_none}),
 177                 'timestamp': ('publishDateAt', {float_or_none(scale=1000)}),
 178                 'view_count': ('readCount', {int_or_none}),
 179                 'duration': ('duration', {int_or_none}),
 180                 'channel': ('channel', 'channelName', {str}),
 181                 'channel_id': ('channel', 'channelId', {str}),
 182                 'channel_is_verified': ('channel', 'verifiedMark', {bool}),
 183             }),
 184         }