yt_dlp/extractor/niconicochannelplus.py

   1 import functools
   2 import json
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     ExtractorError,
   7     OnDemandPagedList,
   8     filter_dict,
   9     int_or_none,
  10     parse_qs,
  11     str_or_none,
  12     traverse_obj,
  13     unified_timestamp,
  14     url_or_none,
  15 )
  16
  17
  18 class NiconicoChannelPlusBaseIE(InfoExtractor):
  19     _WEBPAGE_BASE_URL = 'https://nicochannel.jp'
  20
  21     def _call_api(self, path, item_id, **kwargs):
  22         return self._download_json(
  23             f'https://nfc-api.nicochannel.jp/fc/{path}', video_id=item_id, **kwargs)
  24
  25     def _find_fanclub_site_id(self, channel_name):
  26         fanclub_list_json = self._call_api(
  27             'content_providers/channels', item_id=f'channels/{channel_name}',
  28             note='Fetching channel list', errnote='Unable to fetch channel list',
  29         )['data']['content_providers']
  30         fanclub_id = traverse_obj(fanclub_list_json, (
  31             lambda _, v: v['domain'] == f'{self._WEBPAGE_BASE_URL}/{channel_name}', 'id'),
  32             get_all=False)
  33         if not fanclub_id:
  34             raise ExtractorError(f'Channel {channel_name} does not exist', expected=True)
  35         return fanclub_id
  36
  37     def _get_channel_base_info(self, fanclub_site_id):
  38         return traverse_obj(self._call_api(
  39             f'fanclub_sites/{fanclub_site_id}/page_base_info', item_id=f'fanclub_sites/{fanclub_site_id}',
  40             note='Fetching channel base info', errnote='Unable to fetch channel base info', fatal=False,
  41         ), ('data', 'fanclub_site', {dict})) or {}
  42
  43     def _get_channel_user_info(self, fanclub_site_id):
  44         return traverse_obj(self._call_api(
  45             f'fanclub_sites/{fanclub_site_id}/user_info', item_id=f'fanclub_sites/{fanclub_site_id}',
  46             note='Fetching channel user info', errnote='Unable to fetch channel user info', fatal=False,
  47             data=json.dumps('null').encode('ascii'),
  48         ), ('data', 'fanclub_site', {dict})) or {}
  49
  50
  51 class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE):
  52     IE_NAME = 'NiconicoChannelPlus'
  53     IE_DESC = 'ニコニコチャンネルプラス'
  54     _VALID_URL = r'https?://nicochannel\.jp/(?P<channel>[\w.-]+)/(?:video|live)/(?P<code>sm\w+)'
  55     _TESTS = [{
  56         'url': 'https://nicochannel.jp/kaorin/video/smsDd8EdFLcVZk9yyAhD6H7H',
  57         'info_dict': {
  58             'id': 'smsDd8EdFLcVZk9yyAhD6H7H',
  59             'title': '前田佳織里はニコ生がしたい！',
  60             'ext': 'mp4',
  61             'channel': '前田佳織里の世界攻略計画',
  62             'channel_id': 'kaorin',
  63             'channel_url': 'https://nicochannel.jp/kaorin',
  64             'live_status': 'not_live',
  65             'thumbnail': 'https://nicochannel.jp/public_html/contents/video_pages/74/thumbnail_path',
  66             'description': '２０２１年１１月に放送された\n「前田佳織里はニコ生がしたい！」アーカイブになります。',
  67             'timestamp': 1641360276,
  68             'duration': 4097,
  69             'comment_count': int,
  70             'view_count': int,
  71             'tags': [],
  72             'upload_date': '20220105',
  73         },
  74         'params': {
  75             'skip_download': True,
  76         },
  77     }, {
  78         # age limited video; test purpose channel.
  79         'url': 'https://nicochannel.jp/testman/video/smDXbcrtyPNxLx9jc4BW69Ve',
  80         'info_dict': {
  81             'id': 'smDXbcrtyPNxLx9jc4BW69Ve',
  82             'title': 'test oshiro',
  83             'ext': 'mp4',
  84             'channel': '本番チャンネルプラステストマン',
  85             'channel_id': 'testman',
  86             'channel_url': 'https://nicochannel.jp/testman',
  87             'age_limit': 18,
  88             'live_status': 'was_live',
  89             'timestamp': 1666344616,
  90             'duration': 86465,
  91             'comment_count': int,
  92             'view_count': int,
  93             'tags': [],
  94             'upload_date': '20221021',
  95         },
  96         'params': {
  97             'skip_download': True,
  98         },
  99     }]
 100
 101     def _real_extract(self, url):
 102         content_code, channel_id = self._match_valid_url(url).group('code', 'channel')
 103         fanclub_site_id = self._find_fanclub_site_id(channel_id)
 104
 105         data_json = self._call_api(
 106             f'video_pages/{content_code}', item_id=content_code, headers={'fc_use_device': 'null'},
 107             note='Fetching video page info', errnote='Unable to fetch video page info',
 108         )['data']['video_page']
 109
 110         live_status, session_id = self._get_live_status_and_session_id(content_code, data_json)
 111
 112         release_timestamp_str = data_json.get('live_scheduled_start_at')
 113
 114         formats = []
 115
 116         if live_status == 'is_upcoming':
 117             if release_timestamp_str:
 118                 msg = f'This live event will begin at {release_timestamp_str} UTC'
 119             else:
 120                 msg = 'This event has not started yet'
 121             self.raise_no_formats(msg, expected=True, video_id=content_code)
 122         else:
 123             formats = self._extract_m3u8_formats(
 124                 # "authenticated_url" is a format string that contains "{session_id}".
 125                 m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id),
 126                 video_id=content_code)
 127
 128         return {
 129             'id': content_code,
 130             'formats': formats,
 131             '_format_sort_fields': ('tbr', 'vcodec', 'acodec'),
 132             'channel': self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name'),
 133             'channel_id': channel_id,
 134             'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}',
 135             'age_limit': traverse_obj(self._get_channel_user_info(fanclub_site_id), ('content_provider', 'age_limit')),
 136             'live_status': live_status,
 137             'release_timestamp': unified_timestamp(release_timestamp_str),
 138             **traverse_obj(data_json, {
 139                 'title': ('title', {str}),
 140                 'thumbnail': ('thumbnail_url', {url_or_none}),
 141                 'description': ('description', {str}),
 142                 'timestamp': ('released_at', {unified_timestamp}),
 143                 'duration': ('active_video_filename', 'length', {int_or_none}),
 144                 'comment_count': ('video_aggregate_info', 'number_of_comments', {int_or_none}),
 145                 'view_count': ('video_aggregate_info', 'total_views', {int_or_none}),
 146                 'tags': ('video_tags', ..., 'tag', {str}),
 147             }),
 148             '__post_extractor': self.extract_comments(
 149                 content_code=content_code,
 150                 comment_group_id=traverse_obj(data_json, ('video_comment_setting', 'comment_group_id'))),
 151         }
 152
 153     def _get_comments(self, content_code, comment_group_id):
 154         item_id = f'{content_code}/comments'
 155
 156         if not comment_group_id:
 157             return None
 158
 159         comment_access_token = self._call_api(
 160             f'video_pages/{content_code}/comments_user_token', item_id,
 161             note='Getting comment token', errnote='Unable to get comment token',
 162         )['data']['access_token']
 163
 164         comment_list = self._download_json(
 165             'https://comm-api.sheeta.com/messages.history', video_id=item_id,
 166             note='Fetching comments', errnote='Unable to fetch comments',
 167             headers={'Content-Type': 'application/json'},
 168             query={
 169                 'sort_direction': 'asc',
 170                 'limit': int_or_none(self._configuration_arg('max_comments', [''])[0]) or 120,
 171             },
 172             data=json.dumps({
 173                 'token': comment_access_token,
 174                 'group_id': comment_group_id,
 175             }).encode('ascii'))
 176
 177         for comment in traverse_obj(comment_list, ...):
 178             yield traverse_obj(comment, {
 179                 'author': ('nickname', {str}),
 180                 'author_id': ('sender_id', {str_or_none}),
 181                 'id': ('id', {str_or_none}),
 182                 'text': ('message', {str}),
 183                 'timestamp': (('updated_at', 'sent_at', 'created_at'), {unified_timestamp}),
 184                 'author_is_uploader': ('sender_id', {lambda x: x == '-1'}),
 185             }, get_all=False)
 186
 187     def _get_live_status_and_session_id(self, content_code, data_json):
 188         video_type = data_json.get('type')
 189         live_finished_at = data_json.get('live_finished_at')
 190
 191         payload = {}
 192         if video_type == 'vod':
 193             if live_finished_at:
 194                 live_status = 'was_live'
 195             else:
 196                 live_status = 'not_live'
 197         elif video_type == 'live':
 198             if not data_json.get('live_started_at'):
 199                 return 'is_upcoming', ''
 200
 201             if not live_finished_at:
 202                 live_status = 'is_live'
 203             else:
 204                 live_status = 'was_live'
 205                 payload = {'broadcast_type': 'dvr'}
 206
 207                 video_allow_dvr_flg = traverse_obj(data_json, ('video', 'allow_dvr_flg'))
 208                 video_convert_to_vod_flg = traverse_obj(data_json, ('video', 'convert_to_vod_flg'))
 209
 210                 self.write_debug(f'allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.')
 211
 212                 if not (video_allow_dvr_flg and video_convert_to_vod_flg):
 213                     raise ExtractorError(
 214                         'Live was ended, there is no video for download.', video_id=content_code, expected=True)
 215         else:
 216             raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False)
 217
 218         self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}')
 219
 220         session_id = self._call_api(
 221             f'video_pages/{content_code}/session_ids', item_id=f'{content_code}/session',
 222             data=json.dumps(payload).encode('ascii'), headers={
 223                 'Content-Type': 'application/json',
 224                 'fc_use_device': 'null',
 225                 'origin': 'https://nicochannel.jp',
 226             },
 227             note='Getting session id', errnote='Unable to get session id',
 228         )['data']['session_id']
 229
 230         return live_status, session_id
 231
 232
 233 class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE):
 234     _PAGE_SIZE = 12
 235
 236     def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, page):
 237         response = self._call_api(
 238             path, item_id, query={
 239                 **query,
 240                 'page': (page + 1),
 241                 'per_page': self._PAGE_SIZE,
 242             },
 243             headers={'fc_use_device': 'null'},
 244             note=f'Getting channel info (page {page + 1})',
 245             errnote=f'Unable to get channel info (page {page + 1})')
 246
 247         for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')):
 248             # "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD
 249             yield self.url_result(
 250                 f'{self._WEBPAGE_BASE_URL}/{channel_name}/video/{content_code}', NiconicoChannelPlusIE)
 251
 252
 253 class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE):
 254     IE_NAME = 'NiconicoChannelPlus:channel:videos'
 255     IE_DESC = 'ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos'
 256     _VALID_URL = r'https?://nicochannel\.jp/(?P<id>[a-z\d\._-]+)/videos(?:\?.*)?'
 257     _TESTS = [{
 258         # query: None
 259         'url': 'https://nicochannel.jp/testman/videos',
 260         'info_dict': {
 261             'id': 'testman-videos',
 262             'title': '本番チャンネルプラステストマン-videos',
 263         },
 264         'playlist_mincount': 18,
 265     }, {
 266         # query: None
 267         'url': 'https://nicochannel.jp/testtarou/videos',
 268         'info_dict': {
 269             'id': 'testtarou-videos',
 270             'title': 'チャンネルプラステスト太郎-videos',
 271         },
 272         'playlist_mincount': 2,
 273     }, {
 274         # query: None
 275         'url': 'https://nicochannel.jp/testjirou/videos',
 276         'info_dict': {
 277             'id': 'testjirou-videos',
 278             'title': 'チャンネルプラステスト二郎-videos',
 279         },
 280         'playlist_mincount': 12,
 281     }, {
 282         # query: tag
 283         'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8',
 284         'info_dict': {
 285             'id': 'testman-videos',
 286             'title': '本番チャンネルプラステストマン-videos',
 287         },
 288         'playlist_mincount': 6,
 289     }, {
 290         # query: vodType
 291         'url': 'https://nicochannel.jp/testman/videos?vodType=1',
 292         'info_dict': {
 293             'id': 'testman-videos',
 294             'title': '本番チャンネルプラステストマン-videos',
 295         },
 296         'playlist_mincount': 18,
 297     }, {
 298         # query: sort
 299         'url': 'https://nicochannel.jp/testman/videos?sort=-released_at',
 300         'info_dict': {
 301             'id': 'testman-videos',
 302             'title': '本番チャンネルプラステストマン-videos',
 303         },
 304         'playlist_mincount': 18,
 305     }, {
 306         # query: tag, vodType
 307         'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1',
 308         'info_dict': {
 309             'id': 'testman-videos',
 310             'title': '本番チャンネルプラステストマン-videos',
 311         },
 312         'playlist_mincount': 6,
 313     }, {
 314         # query: tag, sort
 315         'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&sort=-released_at',
 316         'info_dict': {
 317             'id': 'testman-videos',
 318             'title': '本番チャンネルプラステストマン-videos',
 319         },
 320         'playlist_mincount': 6,
 321     }, {
 322         # query: vodType, sort
 323         'url': 'https://nicochannel.jp/testman/videos?vodType=1&sort=-released_at',
 324         'info_dict': {
 325             'id': 'testman-videos',
 326             'title': '本番チャンネルプラステストマン-videos',
 327         },
 328         'playlist_mincount': 18,
 329     }, {
 330         # query: tag, vodType, sort
 331         'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1&sort=-released_at',
 332         'info_dict': {
 333             'id': 'testman-videos',
 334             'title': '本番チャンネルプラステストマン-videos',
 335         },
 336         'playlist_mincount': 6,
 337     }]
 338
 339     def _real_extract(self, url):
 340         """
 341         API parameters:
 342             sort:
 343                 -released_at         公開日が新しい順 (newest to oldest)
 344                  released_at         公開日が古い順 (oldest to newest)
 345                 -number_of_vod_views 再生数が多い順 (most play count)
 346                  number_of_vod_views コメントが多い順 (most comments)
 347             vod_type (is "vodType" in "url"):
 348                 0 すべて (all)
 349                 1 会員限定 (members only)
 350                 2 一部無料 (partially free)
 351                 3 レンタル (rental)
 352                 4 生放送アーカイブ (live archives)
 353                 5 アップロード動画 (uploaded videos)
 354         """
 355
 356         channel_id = self._match_id(url)
 357         fanclub_site_id = self._find_fanclub_site_id(channel_id)
 358         channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name')
 359         qs = parse_qs(url)
 360
 361         return self.playlist_result(
 362             OnDemandPagedList(
 363                 functools.partial(
 364                     self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/video_pages',
 365                     filter_dict({
 366                         'tag': traverse_obj(qs, ('tag', 0)),
 367                         'sort': traverse_obj(qs, ('sort', 0), default='-released_at'),
 368                         'vod_type': traverse_obj(qs, ('vodType', 0), default='0'),
 369                     }),
 370                     channel_id, f'{channel_id}/videos'),
 371                 self._PAGE_SIZE),
 372             playlist_id=f'{channel_id}-videos', playlist_title=f'{channel_name}-videos')
 373
 374
 375 class NiconicoChannelPlusChannelLivesIE(NiconicoChannelPlusChannelBaseIE):
 376     IE_NAME = 'NiconicoChannelPlus:channel:lives'
 377     IE_DESC = 'ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives'
 378     _VALID_URL = r'https?://nicochannel\.jp/(?P<id>[a-z\d\._-]+)/lives'
 379     _TESTS = [{
 380         'url': 'https://nicochannel.jp/testman/lives',
 381         'info_dict': {
 382             'id': 'testman-lives',
 383             'title': '本番チャンネルプラステストマン-lives',
 384         },
 385         'playlist_mincount': 18,
 386     }, {
 387         'url': 'https://nicochannel.jp/testtarou/lives',
 388         'info_dict': {
 389             'id': 'testtarou-lives',
 390             'title': 'チャンネルプラステスト太郎-lives',
 391         },
 392         'playlist_mincount': 2,
 393     }, {
 394         'url': 'https://nicochannel.jp/testjirou/lives',
 395         'info_dict': {
 396             'id': 'testjirou-lives',
 397             'title': 'チャンネルプラステスト二郎-lives',
 398         },
 399         'playlist_mincount': 6,
 400     }]
 401
 402     def _real_extract(self, url):
 403         """
 404         API parameters:
 405             live_type:
 406                 1 放送中 (on air)
 407                 2 放送予定 (scheduled live streams, oldest to newest)
 408                 3 過去の放送 - すべて (all ended live streams, newest to oldest)
 409                 4 過去の放送 - 生放送アーカイブ (all archives for live streams, oldest to newest)
 410             We use "4" instead of "3" because some recently ended live streams could not be downloaded.
 411         """
 412
 413         channel_id = self._match_id(url)
 414         fanclub_site_id = self._find_fanclub_site_id(channel_id)
 415         channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name')
 416
 417         return self.playlist_result(
 418             OnDemandPagedList(
 419                 functools.partial(
 420                     self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/live_pages',
 421                     {
 422                         'live_type': 4,
 423                     },
 424                     channel_id, f'{channel_id}/lives'),
 425                 self._PAGE_SIZE),
 426             playlist_id=f'{channel_id}-lives', playlist_title=f'{channel_name}-lives')