yt_dlp/downloader/youtube_live_chat.py

   1 import json
   2 import time
   3
   4 from .fragment import FragmentFD
   5 from ..networking.exceptions import HTTPError
   6 from ..utils import (
   7     RegexNotFoundError,
   8     RetryManager,
   9     dict_get,
  10     int_or_none,
  11     try_get,
  12 )
  13 from ..utils.networking import HTTPHeaderDict
  14
  15
  16 class YoutubeLiveChatFD(FragmentFD):
  17     """ Downloads YouTube live chats fragment by fragment """
  18
  19     def real_download(self, filename, info_dict):
  20         video_id = info_dict['video_id']
  21         self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
  22         if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
  23             self.report_warning('Live chat download runs until the livestream ends. '
  24                                 'If you wish to download the video simultaneously, run a separate yt-dlp instance')
  25
  26         test = self.params.get('test', False)
  27
  28         ctx = {
  29             'filename': filename,
  30             'live': True,
  31             'total_frags': None,
  32         }
  33
  34         from ..extractor.youtube import YoutubeBaseInfoExtractor
  35
  36         ie = YoutubeBaseInfoExtractor(self.ydl)
  37
  38         start_time = int(time.time() * 1000)
  39
  40         def dl_fragment(url, data=None, headers=None):
  41             http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
  42             return self._download_fragment(ctx, url, info_dict, http_headers, data)
  43
  44         def parse_actions_replay(live_chat_continuation):
  45             offset = continuation_id = click_tracking_params = None
  46             processed_fragment = bytearray()
  47             for action in live_chat_continuation.get('actions', []):
  48                 if 'replayChatItemAction' in action:
  49                     replay_chat_item_action = action['replayChatItemAction']
  50                     offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
  51                 processed_fragment.extend(
  52                     json.dumps(action, ensure_ascii=False).encode() + b'\n')
  53             if offset is not None:
  54                 continuation = try_get(
  55                     live_chat_continuation,
  56                     lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
  57                 if continuation:
  58                     continuation_id = continuation.get('continuation')
  59                     click_tracking_params = continuation.get('clickTrackingParams')
  60             self._append_fragment(ctx, processed_fragment)
  61             return continuation_id, offset, click_tracking_params
  62
  63         def try_refresh_replay_beginning(live_chat_continuation):
  64             # choose the second option that contains the unfiltered live chat replay
  65             refresh_continuation = try_get(
  66                 live_chat_continuation,
  67                 lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
  68             if refresh_continuation:
  69                 # no data yet but required to call _append_fragment
  70                 self._append_fragment(ctx, b'')
  71                 refresh_continuation_id = refresh_continuation.get('continuation')
  72                 offset = 0
  73                 click_tracking_params = refresh_continuation.get('trackingParams')
  74                 return refresh_continuation_id, offset, click_tracking_params
  75             return parse_actions_replay(live_chat_continuation)
  76
  77         live_offset = 0
  78
  79         def parse_actions_live(live_chat_continuation):
  80             nonlocal live_offset
  81             continuation_id = click_tracking_params = None
  82             processed_fragment = bytearray()
  83             for action in live_chat_continuation.get('actions', []):
  84                 timestamp = self.parse_live_timestamp(action)
  85                 if timestamp is not None:
  86                     live_offset = timestamp - start_time
  87                 # compatibility with replay format
  88                 pseudo_action = {
  89                     'replayChatItemAction': {'actions': [action]},
  90                     'videoOffsetTimeMsec': str(live_offset),
  91                     'isLive': True,
  92                 }
  93                 processed_fragment.extend(
  94                     json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
  95             continuation_data_getters = [
  96                 lambda x: x['continuations'][0]['invalidationContinuationData'],
  97                 lambda x: x['continuations'][0]['timedContinuationData'],
  98             ]
  99             continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
 100             if continuation_data:
 101                 continuation_id = continuation_data.get('continuation')
 102                 click_tracking_params = continuation_data.get('clickTrackingParams')
 103                 timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
 104                 if timeout_ms is not None:
 105                     time.sleep(timeout_ms / 1000)
 106             self._append_fragment(ctx, processed_fragment)
 107             return continuation_id, live_offset, click_tracking_params
 108
 109         def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
 110             for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index):
 111                 try:
 112                     success = dl_fragment(url, request_data, headers)
 113                     if not success:
 114                         return False, None, None, None
 115                     raw_fragment = self._read_fragment(ctx)
 116                     try:
 117                         data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
 118                     except RegexNotFoundError:
 119                         data = None
 120                     if not data:
 121                         data = json.loads(raw_fragment)
 122                     live_chat_continuation = try_get(
 123                         data,
 124                         lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
 125
 126                     func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
 127                             or frag_index == 1 and try_refresh_replay_beginning
 128                             or parse_actions_replay)
 129                     return (True, *func(live_chat_continuation))
 130                 except HTTPError as err:
 131                     retry.error = err
 132                     continue
 133             return False, None, None, None
 134
 135         self._prepare_and_start_frag_download(ctx, info_dict)
 136
 137         success = dl_fragment(info_dict['url'])
 138         if not success:
 139             return False
 140         raw_fragment = self._read_fragment(ctx)
 141         try:
 142             data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
 143         except RegexNotFoundError:
 144             return False
 145         continuation_id = try_get(
 146             data,
 147             lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
 148         # no data yet but required to call _append_fragment
 149         self._append_fragment(ctx, b'')
 150
 151         ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
 152
 153         if not ytcfg:
 154             return False
 155         api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
 156         innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
 157         if not api_key or not innertube_context:
 158             return False
 159         visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
 160         if info_dict['protocol'] == 'youtube_live_chat_replay':
 161             url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
 162             chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
 163         elif info_dict['protocol'] == 'youtube_live_chat':
 164             url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
 165             chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
 166
 167         frag_index = offset = 0
 168         click_tracking_params = None
 169         while continuation_id is not None:
 170             frag_index += 1
 171             request_data = {
 172                 'context': innertube_context,
 173                 'continuation': continuation_id,
 174             }
 175             if frag_index > 1:
 176                 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
 177                 if click_tracking_params:
 178                     request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
 179                 headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
 180                 headers.update({'content-type': 'application/json'})
 181                 fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
 182                 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
 183                     url, frag_index, fragment_request_data, headers)
 184             else:
 185                 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
 186                     chat_page_url, frag_index)
 187             if not success:
 188                 return False
 189             if test:
 190                 break
 191
 192         return self._finish_frag_download(ctx, info_dict)
 193
 194     @staticmethod
 195     def parse_live_timestamp(action):
 196         action_content = dict_get(
 197             action,
 198             ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
 199         if not isinstance(action_content, dict):
 200             return None
 201         item = dict_get(action_content, ['item', 'bannerRenderer'])
 202         if not isinstance(item, dict):
 203             return None
 204         renderer = dict_get(item, [
 205             # text
 206             'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
 207             'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
 208             # ticker
 209             'liveChatTickerPaidMessageItemRenderer',
 210             'liveChatTickerSponsorItemRenderer',
 211             # banner
 212             'liveChatBannerRenderer',
 213         ])
 214         if not isinstance(renderer, dict):
 215             return None
 216         parent_item_getters = [
 217             lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
 218             lambda x: x['contents'],
 219         ]
 220         parent_item = try_get(renderer, parent_item_getters, dict)
 221         if parent_item:
 222             renderer = dict_get(parent_item, [
 223                 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
 224                 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
 225             ])
 226             if not isinstance(renderer, dict):
 227                 return None
 228         return int_or_none(renderer.get('timestampUsec'), 1000)