3 from .common
import InfoExtractor
9 get_element_html_by_class
,
13 from ..utils
.traversal
import traverse_obj
16 class TV5MondePlusIE(InfoExtractor
):
18 _VALID_URL
= r
'https?://(?:www\.)?tv5monde\.com/tv/video/(?P<id>[^/?#]+)'
21 'url': 'https://www.tv5monde.com/tv/video/65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
22 'md5': 'd2a708902d3df230a357c99701aece05',
24 'id': '3FPa7JMu21_6D4BA7b',
25 'display_id': '65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
27 'title': "Baudouin, l'héritage d'un roi",
28 'thumbnail': 'https://psi.tv5monde.com/upsilon-images/960x540/6f/baudouin-f49c6b0e.jpg',
30 'upload_date': '20240130',
31 'timestamp': 1706641242,
32 'episode': "BAUDOUIN, L'HERITAGE D'UN ROI",
33 'description': 'md5:78125c74a5cac06d7743a2d09126edad',
34 'series': "Baudouin, l'héritage d'un roi",
38 'url': 'https://www.tv5monde.com/tv/video/52952-toute-la-vie-mardi-23-mars-2021',
39 'md5': 'f5e09637cadd55639c05874e22eb56bf',
41 'id': 'obRRZ8m6g9_6D4BA7b',
42 'display_id': '52952-toute-la-vie-mardi-23-mars-2021',
44 'title': 'Toute la vie',
45 'description': 'md5:a824a2e1dfd94cf45fa379a1fb43ce65',
46 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5880553.jpg',
48 'upload_date': '20230721',
49 'timestamp': 1689971646,
50 'series': 'Toute la vie',
51 'episode': 'Mardi 23 mars 2021',
55 'url': 'https://www.tv5monde.com/tv/video/8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
56 'md5': '87cefc34e10a6bf4f7823cccd7b36eb2',
58 'id': 'DOcfvdLKXL_6D4BA7b',
59 'display_id': '8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
61 'title': 'Ce fleuve qui nous charrie',
62 'description': 'md5:62ba3f875343c7fc4082bdfbbc1be992',
63 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5476617.jpg',
65 'upload_date': '20210822',
66 'timestamp': 1629594105,
67 'episode': 'CE FLEUVE QUI NOUS CHARRIE-P001-CE FLEUVE QUI NOUS CHARRIE',
68 'series': 'Ce fleuve qui nous charrie',
72 'url': 'https://www.tv5monde.com/tv/video/70402-tv5monde-le-journal-edition-du-08-05-24-11h',
73 'md5': 'c62977d6d10754a2ecebba70ad370479',
75 'id': 'LgQFrOCNsc_6D4BA7b',
76 'display_id': '70402-tv5monde-le-journal-edition-du-08-05-24-11h',
78 'title': 'TV5MONDE, le journal',
79 'description': 'md5:777dc209eaa4423b678477c36b0b04a8',
80 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/6184105.jpg',
82 'upload_date': '20240508',
83 'timestamp': 1715159640,
84 'series': 'TV5MONDE, le journal',
85 'episode': 'EDITION DU 08/05/24 - 11H',
91 def _extract_subtitles(data_captions
):
93 for f
in traverse_obj(data_captions
, ('files', lambda _
, v
: url_or_none(v
['file']))):
94 subtitles
.setdefault(f
.get('label') or 'fra', []).append({'url': f
['file']})
97 def _real_extract(self
, url
):
98 display_id
= self
._match
_id
(url
)
99 webpage
= self
._download
_webpage
(url
, display_id
, impersonate
=True)
101 if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage
:
102 self
.raise_geo_restricted(countries
=['FR'])
104 vpl_data
= extract_attributes(self
._search
_regex
(
105 r
'(<[^>]+class="video_player_loader"[^>]+>)',
106 webpage
, 'video player loader'))
108 video_files
= self
._parse
_json
(
109 vpl_data
['data-broadcast'], display_id
)
113 def process_video_files(v
):
116 v_url
= video_file
.get('url')
119 if video_file
.get('type') == 'application/deferred':
120 d_param
= urllib
.parse
.quote(v_url
)
121 token
= video_file
.get('token')
124 deferred_json
= self
._download
_json
(
125 f
'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true',
126 display_id
, 'Downloading deferred info', fatal
=False, impersonate
=True,
127 headers
={'Authorization': f
'Bearer {token}'})
128 v_url
= traverse_obj(deferred_json
, (0, 'url', {url_or_none}
))
131 # data-guid from the webpage isn't stable, use the material id from the json urls
132 video_id
= self
._search
_regex
(
133 r
'materials/([\da-zA-Z]{10}_[\da-fA-F]{7})/', v_url
, 'video id', default
=None)
134 process_video_files(deferred_json
)
136 video_format
= video_file
.get('format') or determine_ext(v_url
)
137 if video_format
== 'm3u8':
138 formats
.extend(self
._extract
_m
3u8_formats
(
139 v_url
, display_id
, 'mp4', 'm3u8_native',
140 m3u8_id
='hls', fatal
=False))
141 elif video_format
== 'mpd':
142 formats
.extend(self
._extract
_mpd
_formats
(
143 v_url
, display_id
, fatal
=False))
147 'format_id': video_format
,
150 process_video_files(video_files
)
152 metadata
= self
._parse
_json
(
153 vpl_data
.get('data-metadata') or '{}', display_id
, fatal
=False)
156 video_id
= self
._search
_regex
(
157 (r
'data-guid=["\']([\da
-f
]{8}
-[\da
-f
]{4}
-[\da
-f
]{4}
-[\da
-f
]{4}
-[\da
-f
]{12}
)',
158 r'id_contenu
["\']\s:\s*(\d+)'), webpage, 'video id',
162 **traverse_obj(metadata, ('content', {
164 'title': ('title', {str}),
165 'episode': ('title', {str}),
166 'series': ('series', {str}),
167 'timestamp': ('publishDate_ts', {int_or_none}),
168 'duration': ('duration', {int_or_none}),
171 'display_id': display_id,
172 'title': clean_html(get_element_by_class('main-title', webpage)),
173 'description': clean_html(get_element_by_class('text', get_element_html_by_class('ep-summary', webpage) or '')),
174 'thumbnail': url_or_none(vpl_data.get('data-image')),
176 'subtitles': self._extract_subtitles(self._parse_json(
177 traverse_obj(vpl_data, ('data-captions', {str}), default='{}'), display_id, fatal=False)),