yt_dlp/extractor/fc2.py

   1 import re
   2 import urllib.parse
   3
   4 from .common import InfoExtractor
   5 from ..networking import Request
   6 from ..utils import (
   7     ExtractorError,
   8     js_to_json,
   9     traverse_obj,
  10     update_url_query,
  11     urlencode_postdata,
  12     urljoin,
  13 )
  14
  15
  16 class FC2IE(InfoExtractor):
  17     _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
  18     IE_NAME = 'fc2'
  19     _NETRC_MACHINE = 'fc2'
  20     _TESTS = [{
  21         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
  22         'md5': 'a6ebe8ebe0396518689d963774a54eb7',
  23         'info_dict': {
  24             'id': '20121103kUan1KHs',
  25             'ext': 'flv',
  26             'title': 'Boxing again with Puff',
  27         },
  28     }, {
  29         'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
  30         'info_dict': {
  31             'id': '20150125cEva0hDn',
  32             'ext': 'mp4',
  33         },
  34         'params': {
  35             'username': 'ytdl@yt-dl.org',
  36             'password': '(snip)',
  37         },
  38         'skip': 'requires actual password',
  39     }, {
  40         'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
  41         'only_matching': True,
  42     }]
  43
  44     def _login(self):
  45         username, password = self._get_login_info()
  46         if username is None or password is None:
  47             return False
  48
  49         # Log in
  50         login_form_strs = {
  51             'email': username,
  52             'password': password,
  53             'done': 'video',
  54             'Submit': ' Login ',
  55         }
  56
  57         login_data = urlencode_postdata(login_form_strs)
  58         request = Request(
  59             'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
  60
  61         login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
  62         if 'mode=redirect&login=done' not in login_results:
  63             self.report_warning('unable to log in: bad username or password')
  64             return False
  65
  66         # this is also needed
  67         login_redir = Request('http://id.fc2.com/?mode=redirect&login=done')
  68         self._download_webpage(
  69             login_redir, None, note='Login redirect', errnote='Login redirect failed')
  70
  71         return True
  72
  73     def _real_extract(self, url):
  74         video_id = self._match_id(url)
  75         self._login()
  76         webpage = None
  77         if not url.startswith('fc2:'):
  78             webpage = self._download_webpage(url, video_id)
  79             self.cookiejar.clear_session_cookies()  # must clear
  80             self._login()
  81
  82         title, thumbnail, description = None, None, None
  83         if webpage is not None:
  84             title = self._html_search_regex(
  85                 (r'<h2\s+class="videoCnt_title">([^<]+?)</h2>',
  86                  r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*<img',
  87                  # there's two matches in the webpage
  88                  r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*\1'),
  89                 webpage,
  90                 'title', fatal=False)
  91             thumbnail = self._og_search_thumbnail(webpage)
  92             description = self._og_search_description(webpage, default=None)
  93
  94         vidplaylist = self._download_json(
  95             f'https://video.fc2.com/api/v3/videoplaylist/{video_id}?sh=1&fs=0', video_id,
  96             note='Downloading info page')
  97         vid_url = traverse_obj(vidplaylist, ('playlist', 'nq'))
  98         if not vid_url:
  99             raise ExtractorError('Unable to extract video URL')
 100         vid_url = urljoin('https://video.fc2.com/', vid_url)
 101
 102         return {
 103             'id': video_id,
 104             'title': title,
 105             'url': vid_url,
 106             'ext': 'mp4',
 107             'protocol': 'm3u8_native',
 108             'description': description,
 109             'thumbnail': thumbnail,
 110         }
 111
 112
 113 class FC2EmbedIE(InfoExtractor):
 114     _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
 115     IE_NAME = 'fc2:embed'
 116
 117     _TEST = {
 118         'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン･ブレイク%20S1-01%20マイケル%20【吹替】',
 119         'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
 120         'info_dict': {
 121             'id': '201403223kCqB3Ez',
 122             'ext': 'flv',
 123             'title': 'プリズン･ブレイク S1-01 マイケル 【吹替】',
 124             'thumbnail': r're:^https?://.*\.jpg$',
 125         },
 126     }
 127
 128     def _real_extract(self, url):
 129         mobj = self._match_valid_url(url)
 130         query = urllib.parse.parse_qs(mobj.group('query'))
 131
 132         video_id = query['i'][-1]
 133         title = query.get('tl', [f'FC2 video {video_id}'])[0]
 134
 135         sj = query.get('sj', [None])[0]
 136         thumbnail = None
 137         if sj:
 138             # See thumbnailImagePath() in ServerConst.as of flv2.swf
 139             thumbnail = 'http://video{}-thumbnail.fc2.com/up/pic/{}.jpg'.format(
 140                 sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
 141
 142         return {
 143             '_type': 'url_transparent',
 144             'ie_key': FC2IE.ie_key(),
 145             'url': f'fc2:{video_id}',
 146             'title': title,
 147             'thumbnail': thumbnail,
 148         }
 149
 150
 151 class FC2LiveIE(InfoExtractor):
 152     _VALID_URL = r'https?://live\.fc2\.com/(?P<id>\d+)'
 153     IE_NAME = 'fc2:live'
 154
 155     _TESTS = [{
 156         'url': 'https://live.fc2.com/57892267/',
 157         'info_dict': {
 158             'id': '57892267',
 159             'title': 'どこまで・・・',
 160             'uploader': 'あつあげ',
 161             'uploader_id': '57892267',
 162             'thumbnail': r're:https?://.+fc2.+',
 163         },
 164         'skip': 'livestream',
 165     }]
 166
 167     def _real_extract(self, url):
 168         video_id = self._match_id(url)
 169         webpage = self._download_webpage(f'https://live.fc2.com/{video_id}/', video_id)
 170
 171         self._set_cookie('live.fc2.com', 'js-player_size', '1')
 172
 173         member_api = self._download_json(
 174             'https://live.fc2.com/api/memberApi.php', video_id, data=urlencode_postdata({
 175                 'channel': '1',
 176                 'profile': '1',
 177                 'user': '1',
 178                 'streamid': video_id,
 179             }), note='Requesting member info')
 180
 181         control_server = self._download_json(
 182             'https://live.fc2.com/api/getControlServer.php', video_id, note='Downloading ControlServer data',
 183             data=urlencode_postdata({
 184                 'channel_id': video_id,
 185                 'mode': 'play',
 186                 'orz': '',
 187                 'channel_version': member_api['data']['channel_data']['version'],
 188                 'client_version': '2.1.0\n [1]',
 189                 'client_type': 'pc',
 190                 'client_app': 'browser_hls',
 191                 'ipv6': '',
 192             }), headers={'X-Requested-With': 'XMLHttpRequest'})
 193         self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
 194
 195         ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
 196         playlist_data = None
 197
 198         ws = self._request_webpage(Request(ws_url, headers={
 199             'Origin': 'https://live.fc2.com',
 200         }), video_id, note='Fetching HLS playlist info via WebSocket')
 201
 202         self.write_debug('Sending HLS server request')
 203
 204         while True:
 205             recv = ws.recv()
 206             if not recv:
 207                 continue
 208             data = self._parse_json(recv, video_id, fatal=False)
 209             if not data or not isinstance(data, dict):
 210                 continue
 211
 212             if data.get('name') == 'connect_complete':
 213                 break
 214         ws.send(r'{"name":"get_hls_information","arguments":{},"id":1}')
 215
 216         while True:
 217             recv = ws.recv()
 218             if not recv:
 219                 continue
 220             data = self._parse_json(recv, video_id, fatal=False)
 221             if not data or not isinstance(data, dict):
 222                 continue
 223             if data.get('name') == '_response_' and data.get('id') == 1:
 224                 self.write_debug('Goodbye')
 225                 playlist_data = data
 226                 break
 227             self.write_debug('Server said: {}{}'.format(recv[:100], '...' if len(recv) > 100 else ''))
 228
 229         if not playlist_data:
 230             raise ExtractorError('Unable to fetch HLS playlist info via WebSocket')
 231
 232         formats = []
 233         for name, playlists in playlist_data['arguments'].items():
 234             if not isinstance(playlists, list):
 235                 continue
 236             for pl in playlists:
 237                 if pl.get('status') == 0 and 'master_playlist' in pl.get('url'):
 238                     formats.extend(self._extract_m3u8_formats(
 239                         pl['url'], video_id, ext='mp4', m3u8_id=name, live=True,
 240                         headers={
 241                             'Origin': 'https://live.fc2.com',
 242                             'Referer': url,
 243                         }))
 244
 245         for fmt in formats:
 246             fmt.update({
 247                 'protocol': 'fc2_live',
 248                 'ws': ws,
 249             })
 250
 251         title = self._html_search_meta(('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
 252         if not title:
 253             title = self._html_extract_title(webpage, 'html title', fatal=False)
 254             if title:
 255                 # remove service name in <title>
 256                 title = re.sub(r'\s+-\s+.+$', '', title)
 257         uploader = None
 258         if title:
 259             match = self._search_regex(r'^(.+?)\s*\[(.+?)\]$', title, 'title and uploader', default=None, group=(1, 2))
 260             if match and all(match):
 261                 title, uploader = match
 262
 263         live_info_view = self._search_regex(r'(?s)liveInfoView\s*:\s*({.+?}),\s*premiumStateView', webpage, 'user info', fatal=False) or None
 264         if live_info_view:
 265             # remove jQuery code from object literal
 266             live_info_view = re.sub(r'\$\(.+?\)[^,]+,', '"",', live_info_view)
 267             live_info_view = self._parse_json(js_to_json(live_info_view), video_id)
 268
 269         return {
 270             'id': video_id,
 271             'title': title or traverse_obj(live_info_view, 'title'),
 272             'description': self._html_search_meta(
 273                 ('og:description', 'twitter:description'),
 274                 webpage, 'live description', fatal=False) or traverse_obj(live_info_view, 'info'),
 275             'formats': formats,
 276             'uploader': uploader or traverse_obj(live_info_view, 'name'),
 277             'uploader_id': video_id,
 278             'thumbnail': traverse_obj(live_info_view, 'thumb'),
 279             'is_live': True,
 280         }