3 from .common
import InfoExtractor
4 from .brightcove
import BrightcoveNewIE
6 from ..compat
import compat_str
25 class ITVIE(InfoExtractor
):
26 _VALID_URL
= r
'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
27 _GEO_COUNTRIES
= ['GB']
29 'url': 'https://www.itv.com/hub/plebs/2a1873a0002',
33 'title': 'Plebs - The Orgy',
34 'description': 'md5:4d7159af53ebd5b36e8b3ec82a41fdb4',
38 'thumbnail': r
're:https?://hubimages\.itv\.com/episode/2_1873_0002'
42 'skip_download': True,
45 'url': 'https://www.itv.com/hub/the-jonathan-ross-show/2a1166a0209',
49 'title': 'The Jonathan Ross Show - Series 17 - Episode 8',
50 'description': 'md5:3023dcdd375db1bc9967186cdb3f1399',
51 'series': 'The Jonathan Ross Show',
54 'thumbnail': r
're:https?://hubimages\.itv\.com/episode/2_1873_0002'
58 'skip_download': True,
61 # unavailable via data-playlist-url
62 'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
63 'only_matching': True,
66 'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
67 'only_matching': True,
70 'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
71 'only_matching': True,
74 def _generate_api_headers(self
, hmac
):
76 'Accept': 'application/vnd.itv.vod.playlist.v2+json',
77 'Content-Type': 'application/json',
79 }, self
.geo_verification_headers())
81 def _call_api(self
, video_id
, playlist_url
, headers
, platform_tag
, featureset
, fatal
=True):
82 return self
._download
_json
(
83 playlist_url
, video_id
, data
=json
.dumps({
90 'manufacturer': 'Safari',
102 'variantAvailability': {
107 'platformTag': platform_tag
109 }).encode(), headers
=headers
, fatal
=fatal
)
111 def _get_subtitles(self
, video_id
, variants
, ios_playlist_url
, headers
, *args
, **kwargs
):
113 # Prefer last matching featureset
114 # See: https://github.com/yt-dlp/yt-dlp/issues/986
115 platform_tag_subs
, featureset_subs
= next(
116 ((platform_tag
, featureset
)
117 for platform_tag
, featuresets
in reversed(list(variants
.items())) for featureset
in featuresets
118 if try_get(featureset
, lambda x
: x
[2]) == 'outband-webvtt'),
121 if platform_tag_subs
and featureset_subs
:
122 subs_playlist
= self
._call
_api
(
123 video_id
, ios_playlist_url
, headers
, platform_tag_subs
, featureset_subs
, fatal
=False)
124 subs
= try_get(subs_playlist
, lambda x
: x
['Playlist']['Video']['Subtitles'], list) or []
126 if not isinstance(sub
, dict):
128 href
= url_or_none(sub
.get('Href'))
131 subtitles
.setdefault('en', []).append({'url': href
})
134 def _real_extract(self
, url
):
135 video_id
= self
._match
_id
(url
)
136 webpage
= self
._download
_webpage
(url
, video_id
)
137 params
= extract_attributes(self
._search
_regex
(
138 r
'(?s)(<[^>]+id="video"[^>]*>)', webpage
, 'params'))
139 variants
= self
._parse
_json
(
140 try_get(params
, lambda x
: x
['data-video-variants'], compat_str
) or '{}',
141 video_id
, fatal
=False)
142 # Prefer last matching featureset
143 # See: https://github.com/yt-dlp/yt-dlp/issues/986
144 platform_tag_video
, featureset_video
= next(
145 ((platform_tag
, featureset
)
146 for platform_tag
, featuresets
in reversed(list(variants
.items())) for featureset
in featuresets
147 if set(try_get(featureset
, lambda x
: x
[:2]) or []) == {'aes', 'hls'}),
149 if not platform_tag_video
or not featureset_video
:
150 raise ExtractorError('No downloads available', expected
=True, video_id
=video_id
)
152 ios_playlist_url
= params
.get('data-video-playlist') or params
['data-video-id']
153 headers
= self
._generate
_api
_headers
(params
['data-video-hmac'])
154 ios_playlist
= self
._call
_api
(
155 video_id
, ios_playlist_url
, headers
, platform_tag_video
, featureset_video
)
157 video_data
= try_get(ios_playlist
, lambda x
: x
['Playlist']['Video'], dict) or {}
158 ios_base_url
= video_data
.get('Base')
160 for media_file
in (video_data
.get('MediaFiles') or []):
161 href
= media_file
.get('Href')
165 href
= ios_base_url
+ href
166 ext
= determine_ext(href
)
168 formats
.extend(self
._extract
_m
3u8_formats
(
169 href
, video_id
, 'mp4', entry_protocol
='m3u8_native',
170 m3u8_id
='hls', fatal
=False))
175 info
= self
._search
_json
_ld
(webpage
, video_id
, default
={})
177 json_ld
= self
._parse
_json
(self
._search
_regex
(
178 JSON_LD_RE
, webpage
, 'JSON-LD', '{}',
179 group
='json_ld'), video_id
, fatal
=False)
180 if json_ld
and json_ld
.get('@type') == 'BreadcrumbList':
181 for ile
in (json_ld
.get('itemListElement:') or []):
182 item
= ile
.get('item:') or {}
183 if item
.get('@type') == 'TVEpisode':
184 item
['@context'] = 'http://schema.org'
185 info
= self
._json
_ld
(item
, video_id
, fatal
=False) or {}
189 thumbnail_url
= try_get(params
, lambda x
: x
['data-video-posterframe'], compat_str
)
192 'url': thumbnail_url
.format(width
=1920, height
=1080, quality
=100, blur
=0, bg
='false'),
196 'url': urljoin(base_url(thumbnail_url
), url_basename(thumbnail_url
)),
200 thumbnail_url
= self
._html
_search
_meta
(['og:image', 'twitter:image'], webpage
, default
=None)
203 'url': thumbnail_url
,
205 self
._remove
_duplicate
_formats
(thumbnails
)
209 'title': self
._html
_search
_meta
(['og:title', 'twitter:title'], webpage
),
211 'subtitles': self
.extract_subtitles(video_id
, variants
, ios_playlist_url
, headers
),
212 'duration': parse_duration(video_data
.get('Duration')),
213 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage
)),
214 'thumbnails': thumbnails
218 class ITVBTCCIE(InfoExtractor
):
219 _VALID_URL
= r
'https?://(?:www\.)?itv\.com/(?:news|btcc)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
221 'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
223 'id': 'btcc-2019-brands-hatch-gp-race-action',
224 'title': 'BTCC 2019: Brands Hatch GP race action',
226 'playlist_count': 12,
228 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
230 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
231 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32'
235 BRIGHTCOVE_URL_TEMPLATE
= 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
237 def _real_extract(self
, url
):
238 playlist_id
= self
._match
_id
(url
)
240 webpage
= self
._download
_webpage
(url
, playlist_id
)
243 self
._search
_nextjs
_data
(webpage
, playlist_id
),
244 lambda x
: x
['props']['pageProps']['article']['body']['content']) or []
247 for video
in json_map
:
248 if not any(video
['data'].get(attr
) == 'Brightcove' for attr
in ('name', 'type')):
250 video_id
= video
['data']['id']
251 account_id
= video
['data']['accountId']
252 player_id
= video
['data']['playerId']
253 entries
.append(self
.url_result(
254 smuggle_url(self
.BRIGHTCOVE_URL_TEMPLATE
% (account_id
, player_id
, video_id
), {
255 # ITV does not like some GB IP ranges, so here are some
256 # IP blocks it accepts
258 '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
262 ie
=BrightcoveNewIE
.ie_key(), video_id
=video_id
))
264 title
= self
._og
_search
_title
(webpage
, fatal
=False)
266 return self
.playlist_result(entries
, playlist_id
, title
)