4 from .common
import InfoExtractor
5 from ..networking
.exceptions
import HTTPError
18 class TelecincoBaseIE(InfoExtractor
):
19 def _parse_content(self
, content
, url
):
20 video_id
= content
['dataMediaId']
21 config
= self
._download
_json
(
22 content
['dataConfig'], video_id
, 'Downloading config JSON')
23 services
= config
['services']
24 caronte
= self
._download
_json
(services
['caronte'], video_id
)
25 if traverse_obj(caronte
, ('dls', 0, 'drm', {bool}
)):
26 self
.report_drm(video_id
)
28 stream
= caronte
['dls'][0]['stream']
31 'Origin': re
.match(r
'https?://[^/]+', url
).group(0),
33 geo_headers
= {**headers
, **self
.geo_verification_headers()}
36 cdn
= self
._download
_json
(
37 caronte
['cerbero'], video_id
, data
=json
.dumps({
38 'bbx': caronte
['bbx'],
39 'gbx': self
._download
_json
(services
['gbx'], video_id
)['gbx'],
40 }).encode(), headers
={
41 'Content-Type': 'application/json',
43 })['tokens']['1']['cdn']
44 except ExtractorError
as error
:
45 if isinstance(error
.cause
, HTTPError
) and error
.cause
.status
== 403:
46 error_code
= traverse_obj(
47 self
._webpage
_read
_content
(error
.cause
.response
, caronte
['cerbero'], video_id
, fatal
=False),
48 ({json
.loads
}, 'code', {int}
))
49 if error_code
== 4038:
50 self
.raise_geo_restricted(countries
=['ES'])
53 formats
= self
._extract
_m
3u8_formats
(
54 update_url(stream
, query
=cdn
), video_id
, 'mp4', m3u8_id
='hls', headers
=geo_headers
)
58 'title': traverse_obj(config
, ('info', 'title', {str}
)),
60 'thumbnail': (traverse_obj(content
, ('dataPoster', {url_or_none}
))
61 or traverse_obj(config
, 'poster', 'imageUrl', expected_type
=url_or_none
)),
62 'duration': traverse_obj(content
, ('dataDuration', {int_or_none}
)),
63 'http_headers': headers
,
67 class TelecincoIE(TelecincoBaseIE
):
68 IE_DESC
= 'telecinco.es, cuatro.com and mediaset.es'
69 _VALID_URL
= r
'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
72 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
75 'title': 'Bacalao con kokotxas al pil-pil',
76 'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
79 'md5': '7ee56d665cfd241c0e6d80fd175068b0',
81 'id': 'JEA5ijCnF6p5W08A1rNKn7',
83 'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
87 'skip': 'HTTP Error 410 Gone',
89 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
90 'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
92 'id': 'jn24Od1zGLG4XUZcnUnZB6',
94 'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?',
95 'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
98 'skip': 'Redirects to main page',
100 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
101 'md5': '5ce057f43f30b634fbaf0f18c71a140a',
103 'id': 'aywerkD2Sv1vGNqq9b85Q2',
105 'title': '#DOYLACARA. Con la trata no hay trato',
107 'thumbnail': 'https://album.mediaset.es/eimg/2017/11/02/1tlQLO5Q3mtKT24f3EaC24.jpg',
110 # video in opening's content
111 'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
114 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
115 'description': r
're:Fiorella, la sobrina de Edmundo Arrocet, concedió .{727}',
118 'md5': 'adb28c37238b675dad0f042292f209a7',
120 'id': 'TpI2EttSDAReWpJ1o0NVh2',
122 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
124 'thumbnail': 'https://album.mediaset.es/eimg/2020/02/29/5opaC37lUhKlZ7FoDhiVC.jpg',
128 'skip_download': True,
131 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
132 'only_matching': True,
134 'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
135 'only_matching': True,
137 'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
138 'only_matching': True,
141 def _real_extract(self
, url
):
142 display_id
= self
._match
_id
(url
)
143 webpage
= self
._download
_webpage
(url
, display_id
)
144 article
= self
._search
_json
(
145 r
'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
146 webpage
, 'article', display_id
)['article']
147 description
= traverse_obj(article
, ('leadParagraph', {clean_html}
, filter))
149 if article
.get('editorialType') != 'VID':
152 for p
in traverse_obj(article
, ((('opening', all
), 'body'), lambda _
, v
: v
['content'])):
153 content
= p
['content']
154 type_
= p
.get('type')
155 if type_
== 'paragraph' and isinstance(content
, str):
156 description
= join_nonempty(description
, content
, delim
='')
157 elif type_
== 'video' and isinstance(content
, dict):
158 entries
.append(self
._parse
_content
(content
, url
))
160 return self
.playlist_result(
161 entries
, str_or_none(article
.get('id')),
162 traverse_obj(article
, ('title', {str}
)), clean_html(description
))
164 info
= self
._parse
_content
(article
['opening']['content'], url
)
165 info
['description'] = description