4 from .common
import InfoExtractor
15 class IPrimaIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
18 _NETRC_MACHINE
= 'iprima'
19 _AUTH_ROOT
= 'https://auth.iprima.cz'
23 'url': 'https://prima.iprima.cz/particka/92-epizoda',
27 'title': 'Partička (92)',
28 'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
29 'episode': 'Partička (92)',
31 'series': 'Prima Partička',
33 'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
36 'skip_download': True, # m3u8 download
39 'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
44 'episode': 'Tenerife: V říši ohně',
45 'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
47 'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
48 'title': 'Tenerife: V říši ohně',
49 'timestamp': 1711825800,
50 'upload_date': '20240330',
53 'skip_download': True, # m3u8 download
56 'url': 'http://play.iprima.cz/particka/particka-92',
57 'only_matching': True,
60 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
61 'only_matching': True,
63 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
64 'only_matching': True,
66 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
67 'only_matching': True,
69 'url': 'http://www.iprima.cz/filmy/desne-rande',
70 'only_matching': True,
72 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
73 'only_matching': True,
75 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
76 'only_matching': True,
78 'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
79 'only_matching': True,
81 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
82 'only_matching': True,
85 def _perform_login(self
, username
, password
):
89 login_page
= self
._download
_webpage
(
90 f
'{self._AUTH_ROOT}/oauth2/login', None, note
='Downloading login page',
91 errnote
='Downloading login page failed')
93 login_form
= self
._hidden
_inputs
(login_page
)
97 '_password': password
})
99 profile_select_html
, login_handle
= self
._download
_webpage
_handle
(
100 f
'{self._AUTH_ROOT}/oauth2/login', None, data
=urlencode_postdata(login_form
),
103 # a profile may need to be selected first, even when there is only a single one
104 if '/profile-select' in login_handle
.url
:
105 profile_id
= self
._search
_regex
(
106 r
'data-identifier\s*=\s*["\']?
(\w
+)', profile_select_html, 'profile
id')
108 login_handle = self._request_webpage(
109 f'{self
._AUTH
_ROOT
}/user
/profile
-select
-perform
/{profile_id}
', None,
110 query={'continueUrl
': '/user
/login?redirect_uri
=/user
/'}, note='Selecting profile
')
112 code = traverse_obj(login_handle.url, ({parse_qs}, 'code
', 0))
114 raise ExtractorError('Login failed
', expected=True)
116 token_request_data = {
117 'scope
': 'openid
+email
+profile
+phone
+address
+offline_access
',
118 'client_id
': 'prima_sso
',
119 'grant_type
': 'authorization_code
',
121 'redirect_uri
': f'{self
._AUTH
_ROOT
}/sso
/auth
-check
'}
123 token_data = self._download_json(
124 f'{self
._AUTH
_ROOT
}/oauth2
/token
', None,
125 note='Downloading token
', errnote='Downloading token failed
',
126 data=urlencode_postdata(token_request_data))
128 self.access_token = token_data.get('access_token
')
129 if self.access_token is None:
130 raise ExtractorError('Getting token failed
', expected=True)
132 def _real_initialize(self):
133 if not self.access_token:
134 self.raise_login_required('Login
is required to access any iPrima content
', method='password
')
136 def _raise_access_error(self, error_code):
137 if error_code == 'PLAY_GEOIP_DENIED
':
138 self.raise_geo_restricted(countries=['CZ
'], metadata_available=True)
139 elif error_code is not None:
140 self.raise_no_formats('Access to stream infos forbidden
', expected=True)
142 def _real_extract(self, url):
143 video_id = self._match_id(url)
145 webpage = self._download_webpage(url, video_id)
147 title = self._html_extract_title(webpage) or self._html_search_meta(
148 ['og
:title
', 'twitter
:title
'],
149 webpage, 'title
', default=None)
151 video_id = self._search_regex((
152 r'productId\s
*=\s
*([\'"])(?P<id>p\d+)\1',
153 r'pproduct_id\s*=\s*([\'"])(?P
<id>p\d
+)\
1',
154 r'let\s
+videos\s
*=\s
*([\'"])(?P<id>p\d+)\1',
155 ), webpage, 'real id', group='id', default=None)
158 nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
159 video_id = traverse_obj(
160 nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
163 nuxt_data = self._search_json(
164 r'<script[^>]+\bid=["\']__NUXT_DATA__
["\'][^>]*>',
165 webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
167 video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
170 self.raise_no_formats('Unable to extract video ID from webpage')
172 metadata = self._download_json(
173 f'https://api.play-backend.iprima.cz/api/v1//products/id-{video_id}/play',
174 video_id, note='Getting manifest URLs', errnote='Failed to get manifest URLs',
175 headers={'X-OTT-Access-Token': self.access_token},
178 self._raise_access_error(metadata.get('errorCode'))
180 stream_infos = metadata.get('streamInfos')
182 if stream_infos is None:
183 self.raise_no_formats('Reading stream infos failed', expected=True)
185 for manifest in stream_infos:
186 manifest_type = manifest.get('type')
187 manifest_url = manifest.get('url')
188 ext = determine_ext(manifest_url)
189 if manifest_type == 'HLS' or ext == 'm3u8':
190 formats += self._extract_m3u8_formats(
191 manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
192 m3u8_id='hls', fatal=False)
193 elif manifest_type == 'DASH' or ext == 'mpd':
194 formats += self._extract_mpd_formats(
195 manifest_url, video_id, mpd_id='dash', fatal=False)
197 final_result = self._search_json_ld(webpage, video_id, default={})
198 final_result.update({
200 'title': final_result.get('title') or title,
201 'thumbnail': self._html_search_meta(
202 ['thumbnail', 'og:image', 'twitter:image'],
203 webpage, 'thumbnail', default=None),
205 'description': self._html_search_meta(
206 ['description', 'og:description', 'twitter:description'],
207 webpage, 'description', default=None)})
212 class IPrimaCNNIE(InfoExtractor):
213 _VALID_URL = r'https?://cnn\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
217 'url': 'https://cnn.iprima.cz/porady/strunc/24072020-koronaviru-mam-plne-zuby-strasit-druhou-vlnou-je-absurdni-rika-senatorka-dernerova',
221 'title': 'md5:277c6b1ed0577e51b40ddd35602ff43e',
224 'skip_download': 'm3u8',
228 def _real_extract(self, url):
229 video_id = self._match_id(url)
231 self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
233 webpage = self._download_webpage(url, video_id)
235 title = self._og_search_title(
236 webpage, default=None) or self._search_regex(
237 r'<h1>([^<]+)', webpage, 'title')
239 video_id = self._search_regex(
240 (r'<iframe[^>]+\bsrc=["\'](?
:https?
:)?
//(?
:api\
.play
-backend\
.iprima\
.cz
/prehravac
/embedded|prima\
.iprima\
.cz
/[^
/]+/[^
/]+)\?.*?
\bid
=(p\d
+)',
241 r'data
-product
="([^"]+)">',
242 r'id=["\']player
-(p\d
+)"',
243 r'playerId\s*:\s*["\']player
-(p\d
+)',
244 r'\bvideos\s
*=\s
*["\'](p\d+)'),
247 playerpage = self._download_webpage(
248 'http://play.iprima.cz/prehravac/init',
249 video_id, note='Downloading player', query={
251 '_ts': round(time.time()),
252 'productId': video_id,
253 }, headers={'Referer': url})
257 def extract_formats(format_url, format_key=None, lang=None):
258 ext = determine_ext(format_url)
260 if format_key == 'hls' or ext == 'm3u8':
261 new_formats = self._extract_m3u8_formats(
262 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
263 m3u8_id='hls', fatal=False)
264 elif format_key == 'dash' or ext == 'mpd':
266 new_formats = self._extract_mpd_formats(
267 format_url, video_id, mpd_id='dash', fatal=False)
269 for f in new_formats:
270 if not f.get('language'):
272 formats.extend(new_formats)
274 options = self._parse_json(
276 r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
277 playerpage, 'player options', default='{}'),
278 video_id, transform_source=js_to_json, fatal=False)
280 for key, tracks in options.get('tracks', {}).items():
281 if not isinstance(tracks, list):
284 src = track.get('src')
286 extract_formats(src, key.lower(), track.get('lang'))
289 for _, src in re.findall(r'src["\']\s
*:\s
*(["\'])(.+?)\1', playerpage):
292 if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
293 self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
298 'thumbnail': self._og_search_thumbnail(webpage, default=None),
300 'description': self._og_search_description(webpage, default=None),