yt_dlp/extractor/playsuisse.py

   1 import json
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     ExtractorError,
   6     int_or_none,
   7     parse_qs,
   8     traverse_obj,
   9     update_url_query,
  10     urlencode_postdata,
  11 )
  12
  13
  14 class PlaySuisseIE(InfoExtractor):
  15     _NETRC_MACHINE = 'playsuisse'
  16     _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
  17     _TESTS = [
  18         {
  19             # Old URL
  20             'url': 'https://www.playsuisse.ch/watch/763211/0',
  21             'only_matching': True,
  22         },
  23         {
  24             # episode in a series
  25             'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
  26             'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
  27             'info_dict': {
  28                 'id': '763211',
  29                 'ext': 'mp4',
  30                 'title': 'Knochen',
  31                 'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
  32                 'duration': 3344,
  33                 'series': 'Wilder',
  34                 'season': 'Season 1',
  35                 'season_number': 1,
  36                 'episode': 'Knochen',
  37                 'episode_number': 1,
  38                 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
  39             },
  40         }, {
  41             # film
  42             'url': 'https://www.playsuisse.ch/watch/808675',
  43             'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
  44             'info_dict': {
  45                 'id': '808675',
  46                 'ext': 'mp4',
  47                 'title': 'Der Läufer',
  48                 'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
  49                 'duration': 5280,
  50                 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
  51             },
  52         }, {
  53             # series (treated as a playlist)
  54             'url': 'https://www.playsuisse.ch/detail/1115687',
  55             'info_dict': {
  56                 'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
  57                 'id': '1115687',
  58                 'series': 'They all came out to Montreux',
  59                 'title': 'They all came out to Montreux',
  60             },
  61             'playlist': [{
  62                 'info_dict': {
  63                     'description': 'md5:f2462744834b959a31adc6292380cda2',
  64                     'duration': 3180,
  65                     'episode': 'Folge 1',
  66                     'episode_number': 1,
  67                     'id': '1112663',
  68                     'season': 'Season 1',
  69                     'season_number': 1,
  70                     'series': 'They all came out to Montreux',
  71                     'thumbnail': 're:https://playsuisse-img.akamaized.net/',
  72                     'title': 'Folge 1',
  73                     'ext': 'mp4',
  74                 },
  75             }, {
  76                 'info_dict': {
  77                     'description': 'md5:9dfd308699fe850d3bce12dc1bad9b27',
  78                     'duration': 2935,
  79                     'episode': 'Folge 2',
  80                     'episode_number': 2,
  81                     'id': '1112661',
  82                     'season': 'Season 1',
  83                     'season_number': 1,
  84                     'series': 'They all came out to Montreux',
  85                     'thumbnail': 're:https://playsuisse-img.akamaized.net/',
  86                     'title': 'Folge 2',
  87                     'ext': 'mp4',
  88                 },
  89             }, {
  90                 'info_dict': {
  91                     'description': 'md5:14a93a3356b2492a8f786ab2227ef602',
  92                     'duration': 2994,
  93                     'episode': 'Folge 3',
  94                     'episode_number': 3,
  95                     'id': '1112664',
  96                     'season': 'Season 1',
  97                     'season_number': 1,
  98                     'series': 'They all came out to Montreux',
  99                     'thumbnail': 're:https://playsuisse-img.akamaized.net/',
 100                     'title': 'Folge 3',
 101                     'ext': 'mp4',
 102                 },
 103             }],
 104         },
 105     ]
 106
 107     _GRAPHQL_QUERY = '''
 108         query AssetWatch($assetId: ID!) {
 109             assetV2(id: $assetId) {
 110                 ...Asset
 111                 episodes {
 112                     ...Asset
 113                 }
 114             }
 115         }
 116         fragment Asset on AssetV2 {
 117             id
 118             name
 119             description
 120             duration
 121             episodeNumber
 122             seasonNumber
 123             seriesName
 124             medias {
 125                 type
 126                 url
 127             }
 128             thumbnail16x9 {
 129                 ...ImageDetails
 130             }
 131             thumbnail2x3 {
 132                 ...ImageDetails
 133             }
 134             thumbnail16x9WithTitle {
 135                 ...ImageDetails
 136             }
 137             thumbnail2x3WithTitle {
 138                 ...ImageDetails
 139             }
 140         }
 141         fragment ImageDetails on AssetImage {
 142             id
 143             url
 144         }'''
 145     _LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com'
 146     _LOGIN_PATH = 'B2C_1A__SignInV2'
 147     _ID_TOKEN = None
 148
 149     def _perform_login(self, username, password):
 150         login_page = self._download_webpage(
 151             'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page',
 152             query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'})
 153         settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None)
 154
 155         csrf_token = settings['csrf']
 156         query = {'tx': settings['transId'], 'p': self._LOGIN_PATH}
 157
 158         status = traverse_obj(self._download_json(
 159             f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in',
 160             query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({
 161                 'request_type': 'RESPONSE',
 162                 'signInName': username,
 163                 'password': password,
 164             }), expected_status=400), ('status', {int_or_none}))
 165         if status == 400:
 166             raise ExtractorError('Invalid username or password', expected=True)
 167
 168         urlh = self._request_webpage(
 169             f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed',
 170             None, 'Downloading ID token', query={
 171                 'rememberMe': 'false',
 172                 'csrf_token': csrf_token,
 173                 **query,
 174                 'diags': '',
 175             })
 176
 177         self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0))
 178         if not self._ID_TOKEN:
 179             raise ExtractorError('Login failed')
 180
 181     def _get_media_data(self, media_id):
 182         # NOTE In the web app, the "locale" header is used to switch between languages,
 183         # However this doesn't seem to take effect when passing the header here.
 184         response = self._download_json(
 185             'https://www.playsuisse.ch/api/graphql',
 186             media_id, data=json.dumps({
 187                 'operationName': 'AssetWatch',
 188                 'query': self._GRAPHQL_QUERY,
 189                 'variables': {'assetId': media_id},
 190             }).encode(),
 191             headers={'Content-Type': 'application/json', 'locale': 'de'})
 192
 193         return response['data']['assetV2']
 194
 195     def _real_extract(self, url):
 196         if not self._ID_TOKEN:
 197             self.raise_login_required(method='password')
 198
 199         media_id = self._match_id(url)
 200         media_data = self._get_media_data(media_id)
 201         info = self._extract_single(media_data)
 202         if media_data.get('episodes'):
 203             info.update({
 204                 '_type': 'playlist',
 205                 'entries': map(self._extract_single, media_data['episodes']),
 206             })
 207         return info
 208
 209     def _extract_single(self, media_data):
 210         thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail'))
 211
 212         formats, subtitles = [], {}
 213         for media in traverse_obj(media_data, 'medias', default=[]):
 214             if not media.get('url') or media.get('type') != 'HLS':
 215                 continue
 216             f, subs = self._extract_m3u8_formats_and_subtitles(
 217                 update_url_query(media['url'], {'id_token': self._ID_TOKEN}),
 218                 media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
 219             formats.extend(f)
 220             self._merge_subtitles(subs, target=subtitles)
 221
 222         return {
 223             'id': media_data['id'],
 224             'title': media_data.get('name'),
 225             'description': media_data.get('description'),
 226             'thumbnails': thumbnails,
 227             'duration': int_or_none(media_data.get('duration')),
 228             'formats': formats,
 229             'subtitles': subtitles,
 230             'series': media_data.get('seriesName'),
 231             'season_number': int_or_none(media_data.get('seasonNumber')),
 232             'episode': media_data.get('name') if media_data.get('episodeNumber') else None,
 233             'episode_number': int_or_none(media_data.get('episodeNumber')),
 234         }