[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / tvplay.py
blobb4a927a1d354ee39004b4ea77d11f7a87565702e
1 import re
2 import urllib.parse
4 from .common import InfoExtractor
5 from ..networking.exceptions import HTTPError
6 from ..utils import (
7 ExtractorError,
8 determine_ext,
9 int_or_none,
10 parse_iso8601,
11 qualities,
12 traverse_obj,
13 try_get,
14 update_url_query,
15 url_or_none,
16 urljoin,
20 class TVPlayIE(InfoExtractor):
21 IE_NAME = 'mtg'
22 IE_DESC = 'MTG services'
23 _VALID_URL = r'''(?x)
24 (?:
25 mtg:|
26 https?://
27 (?:www\.)?
28 (?:
29 tvplay(?:\.skaties)?\.lv(?:/parraides)?|
30 (?:tv3play|play\.tv3)\.lt(?:/programos)?|
31 tv3play(?:\.tv3)?\.ee/sisu
33 /(?:[^/]+/)+
35 (?P<id>\d+)
36 '''
37 _TESTS = [
39 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
40 'md5': 'a1612fe0849455423ad8718fe049be21',
41 'info_dict': {
42 'id': '418113',
43 'ext': 'mp4',
44 'title': 'Kādi ir īri? - Viņas melo labāk',
45 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
46 'series': 'Viņas melo labāk',
47 'season': '2.sezona',
48 'season_number': 2,
49 'duration': 25,
50 'timestamp': 1406097056,
51 'upload_date': '20140723',
55 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
56 'info_dict': {
57 'id': '409229',
58 'ext': 'flv',
59 'title': 'Moterys meluoja geriau',
60 'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
61 'series': 'Moterys meluoja geriau',
62 'episode_number': 47,
63 'season': '1 sezonas',
64 'season_number': 1,
65 'duration': 1330,
66 'timestamp': 1403769181,
67 'upload_date': '20140626',
69 'params': {
70 # rtmp download
71 'skip_download': True,
75 'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true',
76 'info_dict': {
77 'id': '238551',
78 'ext': 'flv',
79 'title': 'Kodu keset linna 398537',
80 'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701',
81 'duration': 1257,
82 'timestamp': 1292449761,
83 'upload_date': '20101215',
85 'params': {
86 # rtmp download
87 'skip_download': True,
91 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
92 'only_matching': True,
95 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
96 'only_matching': True,
99 # views is null
100 'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
101 'only_matching': True,
104 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
105 'only_matching': True,
108 'url': 'mtg:418113',
109 'only_matching': True,
113 def _real_extract(self, url):
114 video_id = self._match_id(url)
115 geo_country = self._search_regex(
116 r'https?://[^/]+\.([a-z]{2})', url,
117 'geo country', default=None)
118 if geo_country:
119 self._initialize_geo_bypass({'countries': [geo_country.upper()]})
120 video = self._download_json(
121 f'http://playapi.mtgx.tv/v3/videos/{video_id}', video_id, 'Downloading video JSON')
123 title = video['title']
125 try:
126 streams = self._download_json(
127 f'http://playapi.mtgx.tv/v3/videos/stream/{video_id}',
128 video_id, 'Downloading streams JSON')
129 except ExtractorError as e:
130 if isinstance(e.cause, HTTPError) and e.cause.status == 403:
131 msg = self._parse_json(e.cause.response.read().decode('utf-8'), video_id)
132 raise ExtractorError(msg['msg'], expected=True)
133 raise
135 quality = qualities(['hls', 'medium', 'high'])
136 formats = []
137 for format_id, video_url in streams.get('streams', {}).items():
138 video_url = url_or_none(video_url)
139 if not video_url:
140 continue
141 ext = determine_ext(video_url)
142 if ext == 'f4m':
143 formats.extend(self._extract_f4m_formats(
144 update_url_query(video_url, {
145 'hdcore': '3.5.0',
146 'plugin': 'aasp-3.5.0.151.81',
147 }), video_id, f4m_id='hds', fatal=False))
148 elif ext == 'm3u8':
149 formats.extend(self._extract_m3u8_formats(
150 video_url, video_id, 'mp4', 'm3u8_native',
151 m3u8_id='hls', fatal=False))
152 else:
153 fmt = {
154 'format_id': format_id,
155 'quality': quality(format_id),
156 'ext': ext,
158 if video_url.startswith('rtmp'):
159 m = re.search(
160 r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
161 if not m:
162 continue
163 fmt.update({
164 'ext': 'flv',
165 'url': m.group('url'),
166 'app': m.group('app'),
167 'play_path': m.group('playpath'),
168 'preference': -1,
170 else:
171 fmt.update({
172 'url': video_url,
174 formats.append(fmt)
176 if not formats and video.get('is_geo_blocked'):
177 self.raise_geo_restricted(
178 'This content might not be available in your country due to copyright reasons',
179 metadata_available=True)
181 # TODO: webvtt in m3u8
182 subtitles = {}
183 sami_path = video.get('sami_path')
184 if sami_path:
185 lang = self._search_regex(
186 r'_([a-z]{2})\.xml', sami_path, 'lang',
187 default=urllib.parse.urlparse(url).netloc.rsplit('.', 1)[-1])
188 subtitles[lang] = [{
189 'url': sami_path,
192 series = video.get('format_title')
193 episode_number = int_or_none(video.get('format_position', {}).get('episode'))
194 season = video.get('_embedded', {}).get('season', {}).get('title')
195 season_number = int_or_none(video.get('format_position', {}).get('season'))
197 return {
198 'id': video_id,
199 'title': title,
200 'description': video.get('description'),
201 'series': series,
202 'episode_number': episode_number,
203 'season': season,
204 'season_number': season_number,
205 'duration': int_or_none(video.get('duration')),
206 'timestamp': parse_iso8601(video.get('created_at')),
207 'view_count': try_get(video, lambda x: x['views']['total'], int),
208 'age_limit': int_or_none(video.get('age_limit', 0)),
209 'formats': formats,
210 'subtitles': subtitles,
214 class TVPlayHomeIE(InfoExtractor):
215 _VALID_URL = r'''(?x)
216 https?://
217 (?:tv3?)?
218 play\.(?:tv3|skaties)\.(?P<country>lv|lt|ee)/
219 (?P<live>lives/)?
220 [^?#&]+(?:episode|programme|clip)-(?P<id>\d+)
222 _TESTS = [{
223 'url': 'https://play.tv3.lt/series/gauju-karai-karveliai,serial-2343791/serija-8,episode-2343828',
224 'info_dict': {
225 'id': '2343828',
226 'ext': 'mp4',
227 'title': 'Gaujų karai. Karveliai (2021) | S01E08: Serija 8',
228 'description': 'md5:f6fcfbb236429f05531131640dfa7c81',
229 'duration': 2710,
230 'season': 'Gaujų karai. Karveliai',
231 'season_number': 1,
232 'release_year': 2021,
233 'episode': 'Serija 8',
234 'episode_number': 8,
236 'params': {
237 'skip_download': 'm3u8',
239 }, {
240 'url': 'https://play.tv3.lt/series/moterys-meluoja-geriau-n-7,serial-2574652/serija-25,episode-3284937',
241 'info_dict': {
242 'id': '3284937',
243 'ext': 'mp4',
244 'season': 'Moterys meluoja geriau [N-7]',
245 'season_number': 14,
246 'release_year': 2021,
247 'episode': 'Serija 25',
248 'episode_number': 25,
249 'title': 'Moterys meluoja geriau [N-7] (2021) | S14|E25: Serija 25',
250 'description': 'md5:c6926e9710f1a126f028fbe121eddb79',
251 'duration': 2440,
253 'skip': '404',
254 }, {
255 'url': 'https://play.tv3.lt/lives/tv6-lt,live-2838694/optibet-a-lygos-rungtynes-marijampoles-suduva--vilniaus-riteriai,programme-3422014',
256 'only_matching': True,
257 }, {
258 'url': 'https://tv3play.skaties.lv/series/women-lie-better-lv,serial-1024464/women-lie-better-lv,episode-1038762',
259 'only_matching': True,
260 }, {
261 'url': 'https://play.tv3.ee/series/_,serial-2654462/_,episode-2654474',
262 'only_matching': True,
263 }, {
264 'url': 'https://tv3play.skaties.lv/clips/tv3-zinas-valsti-lidz-15novembrim-bus-majsede,clip-3464509',
265 'only_matching': True,
268 def _real_extract(self, url):
269 country, is_live, video_id = self._match_valid_url(url).groups()
271 api_path = 'lives/programmes' if is_live else 'vods'
272 data = self._download_json(
273 urljoin(url, f'/api/products/{api_path}/{video_id}?platform=BROWSER&lang={country.upper()}'),
274 video_id)
276 video_type = 'CATCHUP' if is_live else 'MOVIE'
277 stream_id = data['programRecordingId'] if is_live else video_id
278 stream = self._download_json(
279 urljoin(url, f'/api/products/{stream_id}/videos/playlist?videoType={video_type}&platform=BROWSER'), video_id)
280 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
281 stream['sources']['HLS'][0]['src'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
283 thumbnails = set(traverse_obj(
284 data, (('galary', 'images', 'artworks'), ..., ..., ('miniUrl', 'mainUrl')), expected_type=url_or_none))
286 return {
287 'id': video_id,
288 'title': self._resolve_title(data),
289 'description': traverse_obj(data, 'description', 'lead'),
290 'duration': int_or_none(data.get('duration')),
291 'season': traverse_obj(data, ('season', 'serial', 'title')),
292 'season_number': int_or_none(traverse_obj(data, ('season', 'number'))),
293 'episode': data.get('title'),
294 'episode_number': int_or_none(data.get('episode')),
295 'release_year': int_or_none(traverse_obj(data, ('season', 'serial', 'year'))),
296 'thumbnails': [{'url': url, 'ext': 'jpg'} for url in thumbnails],
297 'formats': formats,
298 'subtitles': subtitles,
301 @staticmethod
302 def _resolve_title(data):
303 return try_get(data, lambda x: (
304 f'{data["season"]["serial"]["title"]} ({data["season"]["serial"]["year"]}) | '
305 f'S{data["season"]["number"]:02d}E{data["episode"]:02d}: {data["title"]}'
306 )) or data.get('title')