[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / paramountplus.py
blob317f53b2bcb82f02e4d7f52daa7e450ee11a7c5c
1 import itertools
3 from .cbs import CBSBaseIE
4 from .common import InfoExtractor
5 from ..utils import (
6 ExtractorError,
7 int_or_none,
8 url_or_none,
12 class ParamountPlusIE(CBSBaseIE):
13 _VALID_URL = r'''(?x)
14 (?:
15 paramountplus:|
16 https?://(?:www\.)?(?:
17 paramountplus\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/
18 )(?P<id>[\w-]+))'''
20 # All tests are blocked outside US
21 _TESTS = [{
22 'url': 'https://www.paramountplus.com/shows/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/',
23 'info_dict': {
24 'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
25 'ext': 'mp4',
26 'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
27 'description': 'md5:7ac835000645a69933df226940e3c859',
28 'duration': 1426,
29 'timestamp': 920264400,
30 'upload_date': '19990301',
31 'uploader': 'CBSI-NEW',
32 'episode_number': 5,
33 'thumbnail': r're:https?://.+\.jpg$',
34 'season': 'Season 2',
35 'chapters': 'count:3',
36 'episode': 'Episode 5',
37 'season_number': 2,
38 'series': 'CatDog',
40 'params': {
41 'skip_download': 'm3u8',
43 }, {
44 'url': 'https://www.paramountplus.com/shows/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/',
45 'info_dict': {
46 'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
47 'ext': 'mp4',
48 'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
49 'description': 'md5:f4adcea3e8b106192022e121f1565bae',
50 'duration': 2506,
51 'timestamp': 1627063200,
52 'upload_date': '20210723',
53 'uploader': 'CBSI-NEW',
54 'episode_number': 81,
55 'thumbnail': r're:https?://.+\.jpg$',
56 'season': 'Season 2',
57 'chapters': 'count:4',
58 'episode': 'Episode 81',
59 'season_number': 2,
60 'series': 'Tooning Out The News',
62 'params': {
63 'skip_download': 'm3u8',
65 }, {
66 'url': 'https://www.paramountplus.com/movies/video/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC/',
67 'info_dict': {
68 'id': 'vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC',
69 'ext': 'mp4',
70 'title': 'Daddy\'s Home',
71 'upload_date': '20151225',
72 'description': 'md5:9a6300c504d5e12000e8707f20c54745',
73 'uploader': 'CBSI-NEW',
74 'timestamp': 1451030400,
75 'thumbnail': r're:https?://.+\.jpg$',
76 'chapters': 'count:0',
77 'duration': 5761,
78 'series': 'Paramount+ Movies',
80 'params': {
81 'skip_download': 'm3u8',
83 'skip': 'DRM',
84 }, {
85 'url': 'https://www.paramountplus.com/movies/video/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc/',
86 'info_dict': {
87 'id': '5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc',
88 'ext': 'mp4',
89 'uploader': 'CBSI-NEW',
90 'description': 'md5:bc7b6fea84ba631ef77a9bda9f2ff911',
91 'timestamp': 1577865600,
92 'title': 'Sonic the Hedgehog',
93 'upload_date': '20200101',
94 'thumbnail': r're:https?://.+\.jpg$',
95 'chapters': 'count:0',
96 'duration': 5932,
97 'series': 'Paramount+ Movies',
99 'params': {
100 'skip_download': 'm3u8',
102 'skip': 'DRM',
103 }, {
104 'url': 'https://www.paramountplus.com/shows/the-real-world/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/the-real-world-reunion/',
105 'only_matching': True,
106 }, {
107 'url': 'https://www.paramountplus.com/shows/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/',
108 'only_matching': True,
109 }, {
110 'url': 'https://www.paramountplus.com/movies/video/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/',
111 'only_matching': True,
112 }, {
113 'url': 'https://www.paramountplus.com/movies/paw-patrol-the-movie/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/',
114 'only_matching': True,
117 def _extract_video_info(self, content_id, mpx_acc=2198311517):
118 items_data = self._download_json(
119 f'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/{content_id}.json',
120 content_id, query={
121 'locale': 'en-us',
122 'at': 'ABCXgPuoStiPipsK0OHVXIVh68zNys+G4f7nW9R6qH68GDOcneW6Kg89cJXGfiQCsj0=',
123 }, headers=self.geo_verification_headers())
125 asset_types = {
126 item.get('assetType'): {
127 'format': 'SMIL',
128 'formats': 'M3U+none,MPEG4', # '+none' specifies ProtectionScheme (no DRM)
129 } for item in items_data['itemList']
131 item = items_data['itemList'][-1]
133 info, error = {}, None
134 metadata = {
135 'title': item.get('title'),
136 'series': item.get('seriesTitle'),
137 'season_number': int_or_none(item.get('seasonNum')),
138 'episode_number': int_or_none(item.get('episodeNum')),
139 'duration': int_or_none(item.get('duration')),
140 'thumbnail': url_or_none(item.get('thumbnail')),
142 try:
143 info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata)
144 except ExtractorError as e:
145 error = e
147 # Check for DRM formats to give appropriate error
148 if not info.get('formats'):
149 for query in asset_types.values():
150 query['formats'] = 'MPEG-DASH,M3U,MPEG4' # allows DRM formats
152 try:
153 drm_info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata)
154 except ExtractorError:
155 if error:
156 raise error from None
157 raise
158 if drm_info['formats']:
159 self.report_drm(content_id)
160 elif error:
161 raise error
163 return info
166 class ParamountPlusSeriesIE(InfoExtractor):
167 _VALID_URL = r'https?://(?:www\.)?paramountplus\.com/shows/(?P<id>[a-zA-Z0-9-_]+)/?(?:[#?]|$)'
168 _TESTS = [{
169 'url': 'https://www.paramountplus.com/shows/drake-josh',
170 'playlist_mincount': 50,
171 'info_dict': {
172 'id': 'drake-josh',
174 }, {
175 'url': 'https://www.paramountplus.com/shows/hawaii_five_0/',
176 'playlist_mincount': 240,
177 'info_dict': {
178 'id': 'hawaii_five_0',
180 }, {
181 'url': 'https://www.paramountplus.com/shows/spongebob-squarepants/',
182 'playlist_mincount': 248,
183 'info_dict': {
184 'id': 'spongebob-squarepants',
188 def _entries(self, show_name):
189 for page in itertools.count():
190 show_json = self._download_json(
191 f'https://www.paramountplus.com/shows/{show_name}/xhr/episodes/page/{page}/size/50/xs/0/season/0', show_name)
192 if not show_json.get('success'):
193 return
194 for episode in show_json['result']['data']:
195 yield self.url_result(
196 'https://www.paramountplus.com{}'.format(episode['url']),
197 ie=ParamountPlusIE.ie_key(), video_id=episode['content_id'])
199 def _real_extract(self, url):
200 show_name = self._match_id(url)
201 return self.playlist_result(self._entries(show_name), playlist_id=show_name)