yt_dlp/extractor/chzzk.py

   1 import functools
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     UserNotLive,
   6     float_or_none,
   7     int_or_none,
   8     parse_iso8601,
   9     url_or_none,
  10 )
  11 from ..utils.traversal import traverse_obj
  12
  13
  14 class CHZZKLiveIE(InfoExtractor):
  15     IE_NAME = 'chzzk:live'
  16     _VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[\da-f]+)'
  17     _TESTS = [{
  18         'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753',
  19         'info_dict': {
  20             'id': 'c68b8ef525fb3d2fa146344d84991753',
  21             'ext': 'mp4',
  22             'title': str,
  23             'channel': '진짜도현',
  24             'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
  25             'channel_is_verified': False,
  26             'thumbnail': r're:^https?://.*\.jpg$',
  27             'timestamp': 1705510344,
  28             'upload_date': '20240117',
  29             'live_status': 'is_live',
  30             'view_count': int,
  31             'concurrent_view_count': int,
  32         },
  33         'skip': 'The channel is not currently live',
  34     }]
  35
  36     def _real_extract(self, url):
  37         channel_id = self._match_id(url)
  38         live_detail = self._download_json(
  39             f'https://api.chzzk.naver.com/service/v3/channels/{channel_id}/live-detail', channel_id,
  40             note='Downloading channel info', errnote='Unable to download channel info')['content']
  41
  42         if live_detail.get('status') == 'CLOSE':
  43             raise UserNotLive(video_id=channel_id)
  44
  45         live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
  46
  47         thumbnails = []
  48         thumbnail_template = traverse_obj(
  49             live_playback, ('thumbnail', 'snapshotThumbnailTemplate', {url_or_none}))
  50         if thumbnail_template and '{type}' in thumbnail_template:
  51             for width in traverse_obj(live_playback, ('thumbnail', 'types', ..., {str})):
  52                 thumbnails.append({
  53                     'id': width,
  54                     'url': thumbnail_template.replace('{type}', width),
  55                     'width': int_or_none(width),
  56                 })
  57
  58         formats, subtitles = [], {}
  59         for media in traverse_obj(live_playback, ('media', lambda _, v: url_or_none(v['path']))):
  60             is_low_latency = media.get('mediaId') == 'LLHLS'
  61             fmts, subs = self._extract_m3u8_formats_and_subtitles(
  62                 media['path'], channel_id, 'mp4', fatal=False, live=True,
  63                 m3u8_id='hls-ll' if is_low_latency else 'hls')
  64             for f in fmts:
  65                 if is_low_latency:
  66                     f['source_preference'] = -2
  67                 if '-afragalow.stream-audio.stream' in f['format_id']:
  68                     f['quality'] = -2
  69             formats.extend(fmts)
  70             self._merge_subtitles(subs, target=subtitles)
  71
  72         return {
  73             'id': channel_id,
  74             'is_live': True,
  75             'formats': formats,
  76             'subtitles': subtitles,
  77             'thumbnails': thumbnails,
  78             **traverse_obj(live_detail, {
  79                 'title': ('liveTitle', {str}),
  80                 'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
  81                 'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
  82                 'view_count': ('accumulateCount', {int_or_none}),
  83                 'channel': ('channel', 'channelName', {str}),
  84                 'channel_id': ('channel', 'channelId', {str}),
  85                 'channel_is_verified': ('channel', 'verifiedMark', {bool}),
  86             }),
  87         }
  88
  89
  90 class CHZZKVideoIE(InfoExtractor):
  91     IE_NAME = 'chzzk:video'
  92     _VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)'
  93     _TESTS = [{
  94         'url': 'https://chzzk.naver.com/video/1754',
  95         'md5': 'b0c0c1bb888d913b93d702b1512c7f06',
  96         'info_dict': {
  97             'id': '1754',
  98             'ext': 'mp4',
  99             'title': '치지직 테스트 방송',
 100             'channel': '침착맨',
 101             'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
 102             'channel_is_verified': False,
 103             'thumbnail': r're:^https?://.*\.jpg$',
 104             'duration': 15577,
 105             'timestamp': 1702970505.417,
 106             'upload_date': '20231219',
 107             'view_count': int,
 108         },
 109         'skip': 'Replay video is expired',
 110     }, {
 111         # Manually uploaded video
 112         'url': 'https://chzzk.naver.com/video/1980',
 113         'info_dict': {
 114             'id': '1980',
 115             'ext': 'mp4',
 116             'title': '※시청주의※한번보면 잊기 힘든 영상',
 117             'channel': '라디유radiyu',
 118             'channel_id': '68f895c59a1043bc5019b5e08c83a5c5',
 119             'channel_is_verified': False,
 120             'thumbnail': r're:^https?://.*\.jpg$',
 121             'duration': 95,
 122             'timestamp': 1703102631.722,
 123             'upload_date': '20231220',
 124             'view_count': int,
 125         },
 126     }, {
 127         # Partner channel replay video
 128         'url': 'https://chzzk.naver.com/video/2458',
 129         'info_dict': {
 130             'id': '2458',
 131             'ext': 'mp4',
 132             'title': '첫 방송',
 133             'channel': '강지',
 134             'channel_id': 'b5ed5db484d04faf4d150aedd362f34b',
 135             'channel_is_verified': True,
 136             'thumbnail': r're:^https?://.*\.jpg$',
 137             'duration': 4433,
 138             'timestamp': 1703307460.214,
 139             'upload_date': '20231223',
 140             'view_count': int,
 141         },
 142     }]
 143
 144     def _real_extract(self, url):
 145         video_id = self._match_id(url)
 146         video_meta = self._download_json(
 147             f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
 148             note='Downloading video info', errnote='Unable to download video info')['content']
 149         formats, subtitles = self._extract_mpd_formats_and_subtitles(
 150             f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
 151             query={
 152                 'key': video_meta['inKey'],
 153                 'env': 'real',
 154                 'lc': 'en_US',
 155                 'cpl': 'en_US',
 156             }, note='Downloading video playback', errnote='Unable to download video playback')
 157
 158         return {
 159             'id': video_id,
 160             'formats': formats,
 161             'subtitles': subtitles,
 162             **traverse_obj(video_meta, {
 163                 'title': ('videoTitle', {str}),
 164                 'thumbnail': ('thumbnailImageUrl', {url_or_none}),
 165                 'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
 166                 'view_count': ('readCount', {int_or_none}),
 167                 'duration': ('duration', {int_or_none}),
 168                 'channel': ('channel', 'channelName', {str}),
 169                 'channel_id': ('channel', 'channelId', {str}),
 170                 'channel_is_verified': ('channel', 'verifiedMark', {bool}),
 171             }),
 172         }