yt_dlp/extractor/tver.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     ExtractorError,
   4     join_nonempty,
   5     smuggle_url,
   6     str_or_none,
   7     strip_or_none,
   8     traverse_obj,
   9     update_url_query,
  10 )
  11
  12
  13 class TVerIE(InfoExtractor):
  14     _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)'
  15     _TESTS = [{
  16         'skip': 'videos are only available for 7 days',
  17         'url': 'https://tver.jp/episodes/ep83nf3w4p',
  18         'info_dict': {
  19             'title': '家事ヤロウ!!! 売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
  20             'description': 'md5:dc2c06b6acc23f1e7c730c513737719b',
  21             'series': '家事ヤロウ!!!',
  22             'episode': '売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
  23             'alt_title': '売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
  24             'channel': 'テレビ朝日',
  25             'id': 'ep83nf3w4p',
  26             'ext': 'mp4',
  27             'onair_label': '5月3日(火)放送分',
  28             'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！ テレビ朝日 5月3日(火)放送分',
  29         },
  30         'add_ie': ['BrightcoveNew'],
  31     }, {
  32         'url': 'https://tver.jp/corner/f0103888',
  33         'only_matching': True,
  34     }, {
  35         'url': 'https://tver.jp/lp/f0033031',
  36         'only_matching': True,
  37     }, {
  38         'url': 'https://tver.jp/series/srtxft431v',
  39         'info_dict': {
  40             'id': 'srtxft431v',
  41             'title': '名探偵コナン',
  42         },
  43         'playlist': [
  44             {
  45                 'md5': '779ffd97493ed59b0a6277ea726b389e',
  46                 'info_dict': {
  47                     'id': 'ref:conan-1137-241005',
  48                     'ext': 'mp4',
  49                     'title': '名探偵コナン #1137「行列店、味変の秘密」',
  50                     'uploader_id': '5330942432001',
  51                     'tags': [],
  52                     'channel': '読売テレビ',
  53                     'series': '名探偵コナン',
  54                     'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5',
  55                     'episode': '#1137「行列店、味変の秘密」',
  56                     'duration': 1469.077,
  57                     'timestamp': 1728030405,
  58                     'upload_date': '20241004',
  59                     'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
  60                     'thumbnail': r're:https://.+\.jpg',
  61                 },
  62             }],
  63     }, {
  64         'url': 'https://tver.jp/series/sru35hwdd2',
  65         'info_dict': {
  66             'id': 'sru35hwdd2',
  67             'title': '神回だけ見せます！',
  68         },
  69         'playlist_count': 11,
  70     }, {
  71         'url': 'https://tver.jp/series/srkq2shp9d',
  72         'only_matching': True,
  73     }]
  74     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
  75     _HEADERS = {'x-tver-platform-type': 'web'}
  76     _PLATFORM_QUERY = {}
  77
  78     def _real_initialize(self):
  79         session_info = self._download_json(
  80             'https://platform-api.tver.jp/v2/api/platform_users/browser/create',
  81             None, 'Creating session', data=b'device_type=pc')
  82         self._PLATFORM_QUERY = traverse_obj(session_info, ('result', {
  83             'platform_uid': 'platform_uid',
  84             'platform_token': 'platform_token',
  85         }))
  86
  87     def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
  88         return self._download_json(
  89             f'https://platform-api.tver.jp/service/api/{path}', video_id, note,
  90             fatal=fatal, headers=self._HEADERS, query={
  91                 **self._PLATFORM_QUERY,
  92                 **(query or {}),
  93             })
  94
  95     def _yield_episode_ids_for_series(self, series_id):
  96         seasons_info = self._download_json(
  97             f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}',
  98             series_id, 'Downloading seasons info', headers=self._HEADERS)
  99         for season_id in traverse_obj(
 100                 seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})):
 101             episodes_info = self._call_platform_api(
 102                 f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info')
 103             yield from traverse_obj(episodes_info, (
 104                 'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str}))
 105
 106     def _real_extract(self, url):
 107         video_id, video_type = self._match_valid_url(url).group('id', 'type')
 108
 109         if video_type == 'series':
 110             series_info = self._call_platform_api(
 111                 f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
 112             return self.playlist_from_matches(
 113                 self._yield_episode_ids_for_series(video_id), video_id,
 114                 traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})),
 115                 ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}')
 116
 117         if video_type != 'episodes':
 118             webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
 119             video_id = self._match_id(self._search_regex(
 120                 (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
 121                 webpage, 'url regex'))
 122
 123         episode_info = self._call_platform_api(
 124             f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={
 125                 'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
 126             })
 127         episode_content = traverse_obj(
 128             episode_info, ('result', 'episode', 'content')) or {}
 129
 130         version = traverse_obj(episode_content, ('version', {str_or_none}), default='5')
 131         video_info = self._download_json(
 132             f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info',
 133             query={'v': version}, headers={'Referer': 'https://tver.jp/'})
 134         p_id = video_info['video']['accountID']
 135         r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False)
 136         if not r_id:
 137             raise ExtractorError('Failed to extract reference ID for Brightcove')
 138         if not r_id.isdigit():
 139             r_id = f'ref:{r_id}'
 140
 141         episode = strip_or_none(episode_content.get('title'))
 142         series = str_or_none(episode_content.get('seriesTitle'))
 143         title = (
 144             join_nonempty(series, episode, delim=' ')
 145             or str_or_none(video_info.get('title')))
 146         provider = str_or_none(episode_content.get('productionProviderName'))
 147         onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
 148
 149         thumbnails = [
 150             {
 151                 'id': quality,
 152                 'url': update_url_query(
 153                     f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg',
 154                     {'v': version}),
 155                 'width': width,
 156                 'height': height,
 157             }
 158             for quality, width, height in [
 159                 ('small', 480, 270),
 160                 ('medium', 640, 360),
 161                 ('large', 960, 540),
 162                 ('xlarge', 1280, 720),
 163             ]
 164         ]
 165
 166         return {
 167             '_type': 'url_transparent',
 168             'title': title,
 169             'series': series,
 170             'episode': episode,
 171             # an another title which is considered "full title" for some viewers
 172             'alt_title': join_nonempty(title, provider, onair_label, delim=' '),
 173             'channel': provider,
 174             'description': str_or_none(video_info.get('description')),
 175             'thumbnails': thumbnails,
 176             'url': smuggle_url(
 177                 self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}),
 178             'ie_key': 'BrightcoveNew',
 179         }