3 from .brightcove
import BrightcoveNewIE
4 from .common
import InfoExtractor
23 class ITVIE(InfoExtractor
):
24 _VALID_URL
= r
'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
25 _GEO_COUNTRIES
= ['GB']
27 'url': 'https://www.itv.com/hub/plebs/2a1873a0002',
31 'title': 'Plebs - The Orgy',
32 'description': 'md5:4d7159af53ebd5b36e8b3ec82a41fdb4',
36 'thumbnail': r
're:https?://hubimages\.itv\.com/episode/2_1873_0002',
40 'skip_download': True,
43 'url': 'https://www.itv.com/hub/the-jonathan-ross-show/2a1166a0209',
47 'title': 'The Jonathan Ross Show - Series 17 - Episode 8',
48 'description': 'md5:3023dcdd375db1bc9967186cdb3f1399',
49 'series': 'The Jonathan Ross Show',
52 'thumbnail': r
're:https?://hubimages\.itv\.com/episode/2_1873_0002',
56 'skip_download': True,
59 # unavailable via data-playlist-url
60 'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
61 'only_matching': True,
64 'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
65 'only_matching': True,
68 'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
69 'only_matching': True,
72 def _generate_api_headers(self
, hmac
):
74 'Accept': 'application/vnd.itv.vod.playlist.v2+json',
75 'Content-Type': 'application/json',
77 }, self
.geo_verification_headers())
79 def _call_api(self
, video_id
, playlist_url
, headers
, platform_tag
, featureset
, fatal
=True):
80 return self
._download
_json
(
81 playlist_url
, video_id
, data
=json
.dumps({
88 'manufacturer': 'Safari',
100 'variantAvailability': {
105 'platformTag': platform_tag
,
107 }).encode(), headers
=headers
, fatal
=fatal
)
109 def _get_subtitles(self
, video_id
, variants
, ios_playlist_url
, headers
, *args
, **kwargs
):
111 # Prefer last matching featureset
112 # See: https://github.com/yt-dlp/yt-dlp/issues/986
113 platform_tag_subs
, featureset_subs
= next(
114 ((platform_tag
, featureset
)
115 for platform_tag
, featuresets
in reversed(list(variants
.items())) for featureset
in featuresets
116 if try_get(featureset
, lambda x
: x
[2]) == 'outband-webvtt'),
119 if platform_tag_subs
and featureset_subs
:
120 subs_playlist
= self
._call
_api
(
121 video_id
, ios_playlist_url
, headers
, platform_tag_subs
, featureset_subs
, fatal
=False)
122 subs
= try_get(subs_playlist
, lambda x
: x
['Playlist']['Video']['Subtitles'], list) or []
124 if not isinstance(sub
, dict):
126 href
= url_or_none(sub
.get('Href'))
129 subtitles
.setdefault('en', []).append({'url': href
})
132 def _real_extract(self
, url
):
133 video_id
= self
._match
_id
(url
)
134 webpage
= self
._download
_webpage
(url
, video_id
)
135 params
= extract_attributes(self
._search
_regex
(
136 r
'(?s)(<[^>]+id="video"[^>]*>)', webpage
, 'params'))
137 variants
= self
._parse
_json
(
138 try_get(params
, lambda x
: x
['data-video-variants'], str) or '{}',
139 video_id
, fatal
=False)
140 # Prefer last matching featureset
141 # See: https://github.com/yt-dlp/yt-dlp/issues/986
142 platform_tag_video
, featureset_video
= next(
143 ((platform_tag
, featureset
)
144 for platform_tag
, featuresets
in reversed(list(variants
.items())) for featureset
in featuresets
145 if set(try_get(featureset
, lambda x
: x
[:2]) or []) == {'aes', 'hls'}),
147 if not platform_tag_video
or not featureset_video
:
148 raise ExtractorError('No downloads available', expected
=True, video_id
=video_id
)
150 ios_playlist_url
= params
.get('data-video-playlist') or params
['data-video-id']
151 headers
= self
._generate
_api
_headers
(params
['data-video-hmac'])
152 ios_playlist
= self
._call
_api
(
153 video_id
, ios_playlist_url
, headers
, platform_tag_video
, featureset_video
)
155 video_data
= try_get(ios_playlist
, lambda x
: x
['Playlist']['Video'], dict) or {}
156 ios_base_url
= video_data
.get('Base')
158 for media_file
in (video_data
.get('MediaFiles') or []):
159 href
= media_file
.get('Href')
163 href
= ios_base_url
+ href
164 ext
= determine_ext(href
)
166 formats
.extend(self
._extract
_m
3u8_formats
(
167 href
, video_id
, 'mp4', entry_protocol
='m3u8_native',
168 m3u8_id
='hls', fatal
=False))
173 info
= self
._search
_json
_ld
(webpage
, video_id
, default
={})
175 json_ld
= self
._parse
_json
(self
._search
_regex
(
176 JSON_LD_RE
, webpage
, 'JSON-LD', '{}',
177 group
='json_ld'), video_id
, fatal
=False)
178 if json_ld
and json_ld
.get('@type') == 'BreadcrumbList':
179 for ile
in (json_ld
.get('itemListElement:') or []):
180 item
= ile
.get('item:') or {}
181 if item
.get('@type') == 'TVEpisode':
182 item
['@context'] = 'http://schema.org'
183 info
= self
._json
_ld
(item
, video_id
, fatal
=False) or {}
187 thumbnail_url
= try_get(params
, lambda x
: x
['data-video-posterframe'], str)
190 'url': thumbnail_url
.format(width
=1920, height
=1080, quality
=100, blur
=0, bg
='false'),
194 'url': urljoin(base_url(thumbnail_url
), url_basename(thumbnail_url
)),
198 thumbnail_url
= self
._html
_search
_meta
(['og:image', 'twitter:image'], webpage
, default
=None)
201 'url': thumbnail_url
,
203 self
._remove
_duplicate
_formats
(thumbnails
)
207 'title': self
._html
_search
_meta
(['og:title', 'twitter:title'], webpage
),
209 'subtitles': self
.extract_subtitles(video_id
, variants
, ios_playlist_url
, headers
),
210 'duration': parse_duration(video_data
.get('Duration')),
211 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage
)),
212 'thumbnails': thumbnails
,
216 class ITVBTCCIE(InfoExtractor
):
217 _VALID_URL
= r
'https?://(?:www\.)?itv\.com/(?:news|btcc)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
219 'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
221 'id': 'btcc-2019-brands-hatch-gp-race-action',
222 'title': 'BTCC 2019: Brands Hatch GP race action',
224 'playlist_count': 12,
226 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
228 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
229 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32',
233 BRIGHTCOVE_URL_TEMPLATE
= 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
235 def _real_extract(self
, url
):
236 playlist_id
= self
._match
_id
(url
)
238 webpage
= self
._download
_webpage
(url
, playlist_id
)
241 self
._search
_nextjs
_data
(webpage
, playlist_id
),
242 lambda x
: x
['props']['pageProps']['article']['body']['content']) or []
245 for video
in json_map
:
246 if not any(video
['data'].get(attr
) == 'Brightcove' for attr
in ('name', 'type')):
248 video_id
= video
['data']['id']
249 account_id
= video
['data']['accountId']
250 player_id
= video
['data']['playerId']
251 entries
.append(self
.url_result(
252 smuggle_url(self
.BRIGHTCOVE_URL_TEMPLATE
% (account_id
, player_id
, video_id
), {
253 # ITV does not like some GB IP ranges, so here are some
254 # IP blocks it accepts
256 '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21',
260 ie
=BrightcoveNewIE
.ie_key(), video_id
=video_id
))
262 title
= self
._og
_search
_title
(webpage
, fatal
=False)
264 return self
.playlist_result(entries
, playlist_id
, title
)