3 from .common
import InfoExtractor
4 from ..networking
import HEADRequest
26 class RaiBaseIE(InfoExtractor
):
27 _UUID_RE
= r
'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
28 _GEO_COUNTRIES
= ['IT']
31 def _fix_m3u8_formats(self
, media_url
, video_id
):
32 fmts
= self
._extract
_m
3u8_formats
(
33 media_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=False)
35 # Fix malformed m3u8 manifests by setting audio-only/video-only formats
37 if not f
.get('acodec'):
39 if not f
.get('vcodec'):
42 if re
.search(r
'chunklist(?:_b\d+)*_ao[_.]', man_url
): # audio only
44 elif re
.search(r
'chunklist(?:_b\d+)*_vo[_.]', man_url
): # video only
47 if f
['acodec'] == 'none':
49 if f
['vcodec'] == 'none':
54 def _extract_relinker_info(self
, relinker_url
, video_id
, audio_only
=False):
56 # remove \r\n\t before and after <![CDATA[ ]]> to avoid
57 # polluted text with xpath_text
58 s
= re
.sub(r
'(\]\]>)[\r\n\t]+(</)', '\\1\\2', s
)
59 return re
.sub(r
'(>)[\r\n\t]+(<!\[CDATA\[)', '\\1\\2', s
)
61 if not re
.match(r
'https?://', relinker_url
):
62 return {'formats': [{'url': relinker_url
}]}
64 # set User-Agent to generic 'Rai' to avoid quality filtering from
65 # the media server and get the maximum qualities available
66 relinker
= self
._download
_xml
(
67 relinker_url
, video_id
, note
='Downloading XML metadata',
68 transform_source
=fix_cdata
, query
={'output': 64},
69 headers
={**self
.geo_verification_headers(), 'User-Agent': 'Rai'})
71 if xpath_text(relinker
, './license_url', default
='{}') != '{}':
72 self
.report_drm(video_id
)
74 is_live
= xpath_text(relinker
, './is_live', default
='N') == 'Y'
75 duration
= parse_duration(xpath_text(relinker
, './duration', default
=None))
76 media_url
= xpath_text(relinker
, './url[@type="content"]', default
=None)
79 self
.raise_no_formats('The relinker returned no media url')
81 # geo flag is a bit unreliable and not properly set all the time
82 geoprotection
= xpath_text(relinker
, './geoprotection', default
='N') == 'Y'
84 ext
= determine_ext(media_url
)
92 'format_id': 'https-mp3',
94 elif ext
== 'm3u8' or 'format=m3u8' in media_url
:
95 formats
.extend(self
._fix
_m
3u8_formats
(media_url
, video_id
))
97 # very likely no longer needed. Cannot find any url that uses it.
98 manifest_url
= update_url_query(
99 media_url
.replace('manifest#live_hds.f4m', 'manifest.f4m'),
100 {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
101 formats
.extend(self
._extract
_f
4m
_formats
(
102 manifest_url
, video_id
, f4m_id
='hds', fatal
=False))
104 bitrate
= int_or_none(xpath_text(relinker
, './bitrate'))
107 'tbr': bitrate
if bitrate
> 0 else None,
108 'format_id': join_nonempty('https', bitrate
, delim
='-'),
111 raise ExtractorError('Unrecognized media file found')
113 if (not formats
and geoprotection
is True) or '/video_no_available.mp4' in media_url
:
114 self
.raise_geo_restricted(countries
=self
._GEO
_COUNTRIES
, metadata_available
=True)
116 if not audio_only
and not is_live
:
117 formats
.extend(self
._create
_http
_urls
(media_url
, relinker_url
, formats
, video_id
))
121 'duration': duration
,
125 def _create_http_urls(self
, manifest_url
, relinker_url
, fmts
, video_id
):
126 _MANIFEST_REG
= r
'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8'
127 _MP4_TMPL
= '%s&overrideUserAgentRule=mp4-%s'
145 def percentage(number
, target
, pc
=20, roof
=125):
146 """check if the target is in the range of number +/- percent"""
147 if not number
or number
< 0:
149 return abs(target
- number
) < min(float(number
) * float(pc
) / 100.0, roof
)
151 def get_format_info(tbr
):
153 br
= int_or_none(tbr
)
154 if len(fmts
) == 1 and not br
:
155 br
= fmts
[0].get('tbr')
157 tbr
= math
.floor(br
/ 100) * 100
161 # try extracting info from available m3u8 formats
162 format_copy
= [None, None]
165 if percentage(tbr
, f
['tbr']):
166 format_copy
[0] = f
.copy()
167 if [f
.get('width'), f
.get('height')] == _QUALITY
.get(tbr
):
168 format_copy
[1] = f
.copy()
169 format_copy
[1]['tbr'] = tbr
171 # prefer format with similar bitrate because there might be
172 # multiple video with the same resolution but different bitrate
173 format_copy
= format_copy
[0] or format_copy
[1] or {}
175 'format_id': f
'https-{tbr}',
176 'width': format_copy
.get('width'),
177 'height': format_copy
.get('height'),
178 'tbr': format_copy
.get('tbr') or tbr
,
179 'vcodec': format_copy
.get('vcodec') or 'avc1',
180 'acodec': format_copy
.get('acodec') or 'mp4a',
181 'fps': format_copy
.get('fps') or 25,
182 } if format_copy
else {
183 'format_id': f
'https-{tbr}',
184 'width': _QUALITY
[tbr
][0],
185 'height': _QUALITY
[tbr
][1],
192 # Check if MP4 download is available
194 self
._request
_webpage
(
195 HEADRequest(_MP4_TMPL
% (relinker_url
, '*')), video_id
, 'Checking MP4 availability')
196 except ExtractorError
as e
:
197 self
.to_screen(f
'{video_id}: MP4 direct download is not available: {e.cause}')
200 # filter out single-stream formats
201 fmts
= [f
for f
in fmts
202 if f
.get('vcodec') != 'none' and f
.get('acodec') != 'none']
204 mobj
= re
.search(_MANIFEST_REG
, manifest_url
)
207 available_qualities
= mobj
.group('quality').split(',') if mobj
.group('quality') else ['*']
210 for q
in filter(None, available_qualities
):
211 self
.write_debug(f
'Creating https format for quality {q}')
213 'url': _MP4_TMPL
% (relinker_url
, q
),
216 **get_format_info(q
),
221 def _get_thumbnails_list(thumbs
, url
):
223 'url': urljoin(url
, thumb_url
),
224 } for thumb_url
in (thumbs
or {}).values() if thumb_url
]
227 def _extract_subtitles(url
, video_data
):
231 subtitles_array
= video_data
.get('subtitlesArray') or video_data
.get('subtitleList') or []
232 for k
in ('subtitles', 'subtitlesUrl'):
233 subtitles_array
.append({'url': video_data
.get(k
)})
234 for subtitle
in subtitles_array
:
235 sub_url
= subtitle
.get('url')
236 if sub_url
and isinstance(sub_url
, str):
237 sub_lang
= subtitle
.get('language') or 'it'
238 sub_url
= urljoin(url
, sub_url
)
239 sub_ext
= determine_ext(sub_url
, SRT_EXT
)
240 subtitles
.setdefault(sub_lang
, []).append({
244 if STL_EXT
== sub_ext
:
245 subtitles
[sub_lang
].append({
247 'url': sub_url
[:-len(STL_EXT
)] + SRT_EXT
,
252 class RaiPlayIE(RaiBaseIE
):
253 _VALID_URL
= rf
'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)'
255 'url': 'https://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
256 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
258 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
260 'title': 'Report del 07/04/2014',
261 'alt_title': 'St 2013/14 - Report - Espresso nel caffè - 07/04/2014',
262 'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
263 'thumbnail': r
're:^https?://www\.raiplay\.it/.+\.jpg',
269 'subtitles': {'it': 'count:4'},
270 'release_year': 2024,
271 'episode': 'Espresso nel caffè - 07/04/2014',
272 'timestamp': 1396919880,
273 'upload_date': '20140408',
274 'formats': 'count:4',
276 'params': {'skip_download': True},
279 'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
280 'md5': 'aeda7243115380b2dd5e881fd42d949a',
282 'id': 'b1255a4a-8e72-4a2f-b9f3-fc1308e00736',
284 'title': 'Blanca - S1E1 - Senza occhi',
285 'alt_title': 'St 1 Ep 1 - Blanca - Senza occhi',
286 'description': 'md5:75f95d5c030ec8bac263b1212322e28c',
287 'thumbnail': r
're:^https://www\.raiplay\.it/dl/img/.+\.jpg',
288 'uploader': 'Rai Premium',
289 'creator': 'Rai Fiction',
292 'season': 'Season 1',
294 'release_year': 2021,
296 'episode': 'Senza occhi',
297 'timestamp': 1637318940,
298 'upload_date': '20211119',
299 'formats': 'count:7',
301 'params': {'skip_download': True},
302 'expected_warnings': ['Video not available. Likely due to geo-restriction.'],
305 'url': 'https://www.raiplay.it/video/2012/09/S1E11---Tutto-cio-che-luccica-0cab3323-732e-45d6-8e86-7704acab6598.html',
306 'md5': 'a634d20e8ab2d43724c273563f6bf87a',
308 'id': '0cab3323-732e-45d6-8e86-7704acab6598',
310 'title': 'Mia and Me - S1E11 - Tutto ciò che luccica',
311 'alt_title': 'St 1 Ep 11 - Mia and Me - Tutto ciò che luccica',
312 'description': 'md5:4969e594184b1920c4c1f2b704da9dea',
313 'thumbnail': r
're:^https?://.*\.jpg$',
314 'uploader': 'Rai Gulp',
315 'series': 'Mia and Me',
316 'season': 'Season 1',
317 'episode_number': 11,
318 'release_year': 2015,
320 'episode': 'Tutto ciò che luccica',
321 'timestamp': 1348495020,
322 'upload_date': '20120924',
325 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
326 'only_matching': True,
328 # subtitles at 'subtitlesArray' key (see #27698)
329 'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
330 'only_matching': True,
333 'url': 'https://www.raiplay.it/video/2021/06/Lo-straordinario-mondo-di-Zoey-S2E1-Lo-straordinario-ritorno-di-Zoey-3ba992de-2332-41ad-9214-73e32ab209f4.html',
334 'only_matching': True,
337 def _real_extract(self
, url
):
338 base
, video_id
= self
._match
_valid
_url
(url
).groups()
340 media
= self
._download
_json
(
341 f
'{base}.json', video_id
, 'Downloading video JSON')
343 if not self
.get_param('allow_unplayable_formats'):
344 if traverse_obj(media
, (('program_info', None), 'rights_management', 'rights', 'drm')):
345 self
.report_drm(video_id
)
347 video
= media
['video']
348 relinker_info
= self
._extract
_relinker
_info
(video
['content_url'], video_id
)
349 date_published
= join_nonempty(
350 media
.get('date_published'), media
.get('time_published'), delim
=' ')
351 season
= media
.get('season')
352 alt_title
= join_nonempty(media
.get('subtitle'), media
.get('toptitle'), delim
=' - ')
355 'id': remove_start(media
.get('id'), 'ContentItem-') or video_id
,
356 'display_id': video_id
,
357 'title': media
.get('name'),
358 'alt_title': strip_or_none(alt_title
or None),
359 'description': media
.get('description'),
360 'uploader': strip_or_none(
361 traverse_obj(media
, ('program_info', 'channel'))
362 or media
.get('channel') or None),
363 'creator': strip_or_none(
364 traverse_obj(media
, ('program_info', 'editor'))
365 or media
.get('editor') or None),
366 'duration': parse_duration(video
.get('duration')),
367 'timestamp': unified_timestamp(date_published
),
368 'thumbnails': self
._get
_thumbnails
_list
(media
.get('images'), url
),
369 'series': traverse_obj(media
, ('program_info', 'name')),
370 'season_number': int_or_none(season
),
371 'season': season
if (season
and not season
.isdigit()) else None,
372 'episode': media
.get('episode_title'),
373 'episode_number': int_or_none(media
.get('episode')),
374 'subtitles': self
._extract
_subtitles
(url
, video
),
375 'release_year': int_or_none(traverse_obj(media
, ('track_info', 'edit_year'))),
380 class RaiPlayLiveIE(RaiPlayIE
): # XXX: Do not subclass from concrete IE
381 _VALID_URL
= r
'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
383 'url': 'http://www.raiplay.it/dirette/rainews24',
385 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
386 'display_id': 'rainews24',
388 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
389 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
390 'uploader': 'Rai News 24',
391 'creator': 'Rai News 24',
393 'live_status': 'is_live',
394 'upload_date': '20090502',
395 'timestamp': 1241276220,
396 'formats': 'count:3',
398 'params': {'skip_download': True},
402 class RaiPlayPlaylistIE(InfoExtractor
):
403 _VALID_URL
= r
'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?'
405 # entire series episodes + extras...
406 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/',
408 'id': 'nondirloalmiocapo',
409 'title': 'Non dirlo al mio capo',
410 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
412 'playlist_mincount': 30,
415 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/episodi/stagione-2/',
417 'id': 'nondirloalmiocapo',
418 'title': 'Non dirlo al mio capo - Stagione 2',
419 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
421 'playlist_count': 12,
424 def _real_extract(self
, url
):
425 base
, playlist_id
, extra_id
= self
._match
_valid
_url
(url
).groups()
427 program
= self
._download
_json
(
428 f
'{base}.json', playlist_id
, 'Downloading program JSON')
431 extra_id
= extra_id
.upper().rstrip('/')
433 playlist_title
= program
.get('name')
435 for b
in (program
.get('blocks') or []):
436 for s
in (b
.get('sets') or []):
438 if extra_id
!= join_nonempty(
439 b
.get('name'), s
.get('name'), delim
='/').replace(' ', '-').upper():
441 playlist_title
= join_nonempty(playlist_title
, s
.get('name'), delim
=' - ')
446 medias
= self
._download
_json
(
447 f
'{base}/{s_id}.json', s_id
,
448 'Downloading content set JSON', fatal
=False)
451 for m
in (medias
.get('items') or []):
452 path_id
= m
.get('path_id')
455 video_url
= urljoin(url
, path_id
)
456 entries
.append(self
.url_result(
457 video_url
, ie
=RaiPlayIE
.ie_key(),
458 video_id
=RaiPlayIE
._match
_id
(video_url
)))
460 return self
.playlist_result(
461 entries
, playlist_id
, playlist_title
,
462 try_get(program
, lambda x
: x
['program_info']['description']))
465 class RaiPlaySoundIE(RaiBaseIE
):
466 _VALID_URL
= rf
'(?P<base>https?://(?:www\.)?raiplaysound\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)'
468 'url': 'https://www.raiplaysound.it/audio/2021/12/IL-RUGGITO-DEL-CONIGLIO-1ebae2a7-7cdb-42bb-842e-fe0d193e9707.html',
469 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
471 'id': '1ebae2a7-7cdb-42bb-842e-fe0d193e9707',
473 'title': 'Il Ruggito del Coniglio del 10/12/2021',
474 'alt_title': 'md5:0e6476cd57858bb0f3fcc835d305b455',
475 'description': 'md5:2a17d2107e59a4a8faa0e18334139ee2',
476 'thumbnail': r
're:^https?://.+\.jpg$',
477 'uploader': 'rai radio 2',
479 'series': 'Il Ruggito del Coniglio',
480 'episode': 'Il Ruggito del Coniglio del 10/12/2021',
481 'creator': 'rai radio 2',
482 'timestamp': 1638346620,
483 'upload_date': '20211201',
485 'params': {'skip_download': True},
488 def _real_extract(self
, url
):
489 base
, audio_id
= self
._match
_valid
_url
(url
).group('base', 'id')
490 media
= self
._download
_json
(f
'{base}.json', audio_id
, 'Downloading audio JSON')
491 uid
= try_get(media
, lambda x
: remove_start(remove_start(x
['uniquename'], 'ContentItem-'), 'Page-'))
495 relinkers
= set(traverse_obj(media
, (('downloadable_audio', 'audio', ('live', 'cards', 0, 'audio')), 'url')))
497 info
= self
._extract
_relinker
_info
(r
, audio_id
, True)
498 formats
.extend(info
.get('formats'))
500 date_published
= try_get(media
, (lambda x
: f
'{x["create_date"]} {x.get("create_time") or ""}',
501 lambda x
: x
['live']['create_date']))
503 podcast_info
= traverse_obj(media
, 'podcast_info', ('live', 'cards', 0)) or {}
507 'id': uid
or audio_id
,
508 'display_id': audio_id
,
509 'title': traverse_obj(media
, 'title', 'episode_title'),
510 'alt_title': traverse_obj(media
, ('track_info', 'media_name'), expected_type
=strip_or_none
),
511 'description': media
.get('description'),
512 'uploader': traverse_obj(media
, ('track_info', 'channel'), expected_type
=strip_or_none
),
513 'creator': traverse_obj(media
, ('track_info', 'editor'), expected_type
=strip_or_none
),
514 'timestamp': unified_timestamp(date_published
),
515 'thumbnails': self
._get
_thumbnails
_list
(podcast_info
.get('images'), url
),
516 'series': podcast_info
.get('title'),
517 'season_number': int_or_none(media
.get('season')),
518 'episode': media
.get('episode_title'),
519 'episode_number': int_or_none(media
.get('episode')),
524 class RaiPlaySoundLiveIE(RaiPlaySoundIE
): # XXX: Do not subclass from concrete IE
525 _VALID_URL
= r
'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?P<id>[^/?#&]+)$)'
527 'url': 'https://www.raiplaysound.it/radio2',
529 'id': 'b00a50e6-f404-4af6-8f8c-ff3b9af73a44',
530 'display_id': 'radio2',
532 'title': r
're:Rai Radio 2 \d+-\d+-\d+ \d+:\d+',
533 'thumbnail': r
're:^https://www\.raiplaysound\.it/dl/img/.+\.png',
534 'uploader': 'rai radio 2',
535 'series': 'Rai Radio 2',
536 'creator': 'raiplaysound',
538 'live_status': 'is_live',
540 'params': {'skip_download': True},
544 class RaiPlaySoundPlaylistIE(InfoExtractor
):
545 _VALID_URL
= r
'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?:programmi|playlist|audiolibri)/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?'
548 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio',
550 'id': 'ilruggitodelconiglio',
551 'title': 'Il Ruggito del Coniglio',
552 'description': 'md5:62a627b3a2d0635d08fa8b6e0a04f27e',
554 'playlist_mincount': 65,
557 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio/puntate/prima-stagione-1995',
559 'id': 'ilruggitodelconiglio_puntate_prima-stagione-1995',
560 'title': 'Prima Stagione 1995',
565 def _real_extract(self
, url
):
566 base
, playlist_id
, extra_id
= self
._match
_valid
_url
(url
).group('base', 'id', 'extra_id')
568 program
= self
._download
_json
(url
, playlist_id
, 'Downloading program JSON')
571 extra_id
= extra_id
.rstrip('/')
572 playlist_id
+= '_' + extra_id
.replace('/', '_')
573 path
= next(c
['path_id'] for c
in program
.get('filters') or [] if extra_id
in c
.get('weblink'))
574 program
= self
._download
_json
(
575 urljoin('https://www.raiplaysound.it', path
), playlist_id
, 'Downloading program secondary JSON')
578 self
.url_result(urljoin(base
, c
['path_id']), ie
=RaiPlaySoundIE
.ie_key())
579 for c
in traverse_obj(program
, 'cards', ('block', 'cards')) or []
582 return self
.playlist_result(entries
, playlist_id
, program
.get('title'),
583 traverse_obj(program
, ('podcast_info', 'description')))
586 class RaiIE(RaiBaseIE
):
587 _VALID_URL
= rf
'https?://[^/]+\.(?:rai\.(?:it|tv))/.+?-(?P<id>{RaiBaseIE._UUID_RE})(?:-.+?)?\.html'
589 'url': 'https://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
591 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
593 'title': 'TG PRIMO TEMPO',
594 'thumbnail': r
're:^https?://.*\.jpg',
596 'upload_date': '20140612',
598 'params': {'skip_download': True},
599 'expected_warnings': ['Video not available. Likely due to geo-restriction.'],
601 'url': 'https://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
603 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
605 'title': 'TG1 ore 20:00 del 03/11/2016',
606 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
607 'thumbnail': r
're:^https?://.*\.jpg$',
609 'upload_date': '20161103',
611 'params': {'skip_download': True},
613 # Direct MMS: Media URL no longer works.
614 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
615 'only_matching': True,
618 def _real_extract(self
, url
):
619 content_id
= self
._match
_id
(url
)
620 media
= self
._download
_json
(
621 f
'https://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-{content_id}.html?json',
622 content_id
, 'Downloading video JSON', fatal
=False, expected_status
=404)
627 if 'Audio' in media
['type']:
630 'format_id': join_nonempty('https', media
.get('formatoAudio'), delim
='-'),
631 'url': media
['audioUrl'],
632 'ext': media
.get('formatoAudio'),
634 'acodec': media
.get('formatoAudio'),
637 elif 'Video' in media
['type']:
638 relinker_info
= self
._extract
_relinker
_info
(media
['mediaUri'], content_id
)
640 raise ExtractorError('not a media file')
642 thumbnails
= self
._get
_thumbnails
_list
(
643 {image_type
: media
.get(image_type
) for image_type
in (
644 'image', 'image_medium', 'image_300')}, url
)
648 'title': strip_or_none(media
.get('name') or media
.get('title')),
649 'description': strip_or_none(media
.get('desc')) or None,
650 'thumbnails': thumbnails
,
651 'uploader': strip_or_none(media
.get('author')) or None,
652 'upload_date': unified_strdate(media
.get('date')),
653 'duration': parse_duration(media
.get('length')),
654 'subtitles': self
._extract
_subtitles
(url
, media
),
659 class RaiNewsIE(RaiBaseIE
):
660 _VALID_URL
= rf
'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
661 _EMBED_REGEX
= [rf
'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
663 # new rainews player (#3911)
664 'url': 'https://www.rainews.it/video/2024/02/membri-della-croce-rossa-evacuano-gli-abitanti-di-un-villaggio-nella-regione-ucraina-di-kharkiv-il-filmato-dallucraina--31e8017c-845c-43f5-9c48-245b43c3a079.html',
666 'id': '31e8017c-845c-43f5-9c48-245b43c3a079',
668 'title': 'md5:1e81364b09de4a149042bac3c7d36f0b',
670 'upload_date': '20240225',
671 'uploader': 'rainews',
672 'formats': 'count:2',
674 'params': {'skip_download': True},
676 # old content with fallback method to extract media urls
677 'url': 'https://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
679 'id': '1632c009-c843-4836-bb65-80c33084a64b',
681 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
682 'description': 'I film in uscita questa settimana.',
683 'thumbnail': r
're:^https?://.*\.png$',
685 'upload_date': '20161103',
686 'formats': 'count:8',
688 'params': {'skip_download': True},
689 'expected_warnings': ['unable to extract player_data'],
692 'url': 'https://www.rainews.it/iframe/video/2022/07/euro2022-europei-calcio-femminile-italia-belgio-gol-0-1-video-4de06a69-de75-4e32-a657-02f0885f8118.html',
693 'only_matching': True,
697 def _real_extract(self
, url
):
698 video_id
= self
._match
_id
(url
)
700 webpage
= self
._download
_webpage
(url
, video_id
)
702 player_data
= self
._search
_json
(
703 rf
'<rai{self._PLAYER_TAG}-player\s*data=\'', webpage
, 'player_data', video_id
,
704 transform_source
=clean_html
, default
={})
705 track_info
= player_data
.get('track_info')
706 relinker_url
= traverse_obj(player_data
, 'mediapolis', 'content_url')
709 # fallback on old implementation for some old content
711 return RaiIE
._real
_extract
(self
, url
)
712 except GeoRestrictedError
:
714 except ExtractorError
as e
:
715 raise ExtractorError('Relinker URL not found', cause
=e
)
717 relinker_info
= self
._extract
_relinker
_info
(urljoin(url
, relinker_url
), video_id
)
721 'title': player_data
.get('title') or track_info
.get('title') or self
._og
_search
_title
(webpage
),
722 'upload_date': unified_strdate(track_info
.get('date')),
723 'uploader': strip_or_none(track_info
.get('editor') or None),
728 class RaiCulturaIE(RaiNewsIE
): # XXX: Do not subclass from concrete IE
729 _VALID_URL
= rf
'https?://(www\.)?raicultura\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
730 _EMBED_REGEX
= [rf
'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
732 'url': 'https://www.raicultura.it/letteratura/articoli/2018/12/Alberto-Asor-Rosa-Letteratura-e-potere-05ba8775-82b5-45c5-a89d-dd955fbde1fb.html',
734 'id': '05ba8775-82b5-45c5-a89d-dd955fbde1fb',
736 'title': 'Alberto Asor Rosa: Letteratura e potere',
738 'upload_date': '20181206',
739 'uploader': 'raicultura',
740 'formats': 'count:2',
742 'params': {'skip_download': True},
744 _PLAYER_TAG
= 'cultura'
747 class RaiSudtirolIE(RaiBaseIE
):
748 _VALID_URL
= r
'https?://raisudtirol\.rai\.it/.+media=(?P<id>\w+)'
751 'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460',
753 'id': 'Ptv1619729460',
755 'title': 'Euro: trasmisciun d\'economia - 29-04-2021 20:51',
756 'series': 'Euro: trasmisciun d\'economia',
757 'upload_date': '20210429',
758 'thumbnail': r
're:https://raisudtirol\.rai\.it/img/.+\.jpg',
759 'uploader': 'raisudtirol',
760 'formats': 'count:1',
762 'params': {'skip_download': True},
765 'url': 'https://raisudtirol.rai.it/it/kidsplayer.php?lang=it&media=GUGGUG_P1.smil',
769 'title': 'GUGGUG! La Prospettiva - Die Perspektive',
770 'uploader': 'raisudtirol',
771 'formats': 'count:6',
773 'params': {'skip_download': True},
776 def _real_extract(self
, url
):
777 video_id
= self
._match
_id
(url
)
778 webpage
= self
._download
_webpage
(url
, video_id
)
780 video_date
= self
._html
_search
_regex
(
781 r
'<span class="med_data">(.+?)</span>', webpage
, 'video_date', default
=None)
782 video_title
= self
._html
_search
_regex
([
783 r
'<span class="med_title">(.+?)</span>', r
'title: \'(.+?
)\','],
784 webpage, 'video_title
', default=None)
785 video_url = self._html_search_regex([
786 r'sources
:\s
*\
[\
{file:\s
*"(.+?)"\
}\
]',
787 r'<source\s
+src
="(.+?)"\s
+type="application/x-mpegURL"'],
788 webpage, 'video_url
', default=None)
790 ext = determine_ext(video_url)
792 formats = self._extract_m3u8_formats(video_url, video_id)
795 'format_id
': 'https
-mp4
',
796 'url
': self._proto_relative_url(video_url),
805 self.raise_no_formats(f'Unrecognized media
file: {video_url}
')
809 'title
': join_nonempty(video_title, video_date, delim=' - '),
810 'series
': video_title if video_date else None,
811 'upload_date
': unified_strdate(video_date),
812 'thumbnail
': urljoin('https
://raisudtirol
.rai
.it
/', self._html_search_regex(
813 r'image
: \'(.+?
)\'', webpage, 'video_thumb
', default=None)),
814 'uploader
': 'raisudtirol
',