4 from .common
import InfoExtractor
17 from ..utils
.traversal
import find_elements
, traverse_obj
20 class CNNIE(InfoExtractor
):
21 _VALID_URL
= r
'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
24 'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
26 'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
27 'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
29 'upload_date': '20240531',
30 'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
31 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
33 'timestamp': 1717148586,
34 'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
35 'modified_date': '20240531',
36 'modified_timestamp': 1717150140,
39 'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
41 'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
42 'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
44 'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
45 'title': 'Here’s how some inmates in closely divided state are now able to vote from jail',
46 'timestamp': 1718158269,
47 'upload_date': '20240612',
48 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
50 'modified_date': '20240612',
51 'modified_timestamp': 1718158509,
54 'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
56 'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
57 'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
59 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
60 'description': 'md5:19f78338ccec533db0fa8a4511012dae',
61 'title': 'Video shows King Charles\' portrait being vandalized by activists',
62 'timestamp': 1718113852,
63 'upload_date': '20240611',
65 'modified_timestamp': 1718116193,
66 'modified_date': '20240611',
69 'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
71 'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
72 'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
74 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
76 'title': 'Robin Meade signs off after HLN\'s last broadcast',
77 'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
78 'upload_date': '20221205',
79 'timestamp': 1670284296,
80 'modified_timestamp': 1670332404,
81 'modified_date': '20221206',
83 'params': {'format': 'direct'},
85 'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
87 'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
89 'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
90 'timestamp': 1729501452,
91 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
92 'description': 'md5:256ee7137d161f776cda429654135e52',
93 'upload_date': '20241021',
95 'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
96 'modified_date': '20241021',
97 'modified_timestamp': 1729501530,
100 'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
102 'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
106 'md5': '073ffab87b8bef97c9913e71cc18ef9e',
108 'id': 'me19d548fdd54df0924087039283128ef473ab397d',
110 'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
111 'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
112 'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
113 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
115 'timestamp': 1729122182,
116 'upload_date': '20241016',
117 'modified_timestamp': 1729194706,
118 'modified_date': '20241017',
120 'params': {'format': 'direct'},
122 'md5': '11604ab4af83b650826753f1ccb8ecff',
124 'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
126 'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
127 'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
128 'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
129 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
131 'timestamp': 1729137765,
132 'upload_date': '20241017',
133 'modified_timestamp': 1729138184,
134 'modified_date': '20241017',
136 'params': {'format': 'direct'},
140 def _real_extract(self
, url
):
141 display_id
= self
._match
_valid
_url
(url
).group('display_id')
142 webpage
= self
._download
_webpage
(url
, display_id
)
143 app_id
= traverse_obj(
144 self
._search
_json
(r
'window\.env\s*=', webpage
, 'window env', display_id
, default
={}),
145 ('TOP_AUTH_SERVICE_APP_ID', {str}
))
148 for player_data
in traverse_obj(webpage
, (
149 {find_elements(tag
='div', attr
='data-component-name', value
='video-player', html
=True)},
150 ..., {extract_attributes}
, all
, lambda _
, v
: v
['data-media-id'])):
151 media_id
= player_data
['data-media-id']
152 parent_uri
= player_data
.get('data-video-resource-parent-uri')
153 formats
, subtitles
= [], {}
157 video_data
= self
._download
_json
(
158 'https://fave.api.cnn.io/v1/video', media_id
, fatal
=False,
161 'stellarUri': parent_uri
,
163 for direct_url
in traverse_obj(video_data
, ('files', ..., 'fileUri', {url_or_none}
)):
164 resolution
, bitrate
= None, None
165 if mobj
:= re
.search(r
'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url
):
166 resolution
, bitrate
= mobj
.group('res', 'tbr')
169 'format_id': 'direct',
171 'tbr': int_or_none(bitrate
),
172 **parse_resolution(resolution
),
174 for sub_data
in traverse_obj(video_data
, (
175 'closedCaptions', 'types', lambda _
, v
: url_or_none(v
['track']['url']), 'track')):
176 subtitles
.setdefault(sub_data
.get('lang') or 'en', []).append({
177 'url': sub_data
['url'],
178 'name': sub_data
.get('label'),
182 media_data
= self
._download
_json
(
183 f
'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id
, fatal
=False,
184 query
={'appId': app_id
})
185 m3u8_url
= traverse_obj(media_data
, (
186 'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}
))
188 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
189 m3u8_url
, media_id
, 'mp4', m3u8_id
='hls', fatal
=False)
191 self
._merge
_subtitles
(subs
, target
=subtitles
)
194 **traverse_obj(player_data
, {
195 'title': ('data-headline', {clean_html}
),
196 'description': ('data-description', {clean_html}
),
197 'duration': ('data-duration', {parse_duration}
),
198 'timestamp': ('data-publish-date', {parse_iso8601}
),
200 'data-poster-image-override', {json
.loads
}, 'big', 'uri', {url_or_none}
,
201 {update_url(query
='c=original')}),
202 'display_id': 'data-video-slug',
204 **traverse_obj(video_data
, {
205 'timestamp': ('dateCreated', 'uts', {int_or_none(scale
=1000)}),
206 'description': ('description', {clean_html}
),
207 'title': ('headline', {str}
),
208 'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale
=1000)}),
209 'duration': ('trt', {int_or_none}
),
213 'subtitles': subtitles
,
216 if len(entries
) == 1:
219 'display_id': display_id
,
222 return self
.playlist_result(entries
, display_id
)
225 class CNNIndonesiaIE(InfoExtractor
):
226 _VALID_URL
= r
'https?://www\.cnnindonesia\.com/[\w-]+/(?P<upload_date>\d{8})\d+-\d+-(?P<id>\d+)/(?P<display_id>[\w-]+)'
228 'url': 'https://www.cnnindonesia.com/ekonomi/20220909212635-89-845885/alasan-harga-bbm-di-indonesia-masih-disubsidi',
232 'description': 'md5:e7954bfa6f1749bc9ef0c079a719c347',
233 'upload_date': '20220909',
234 'title': 'Alasan Harga BBM di Indonesia Masih Disubsidi',
235 'timestamp': 1662859088,
237 'thumbnail': r
're:https://akcdn\.detik\.net\.id/visual/2022/09/09/thumbnail-ekopedia-alasan-harga-bbm-disubsidi_169\.jpeg',
238 'tags': ['ekopedia', 'subsidi bbm', 'subsidi', 'bbm', 'bbm subsidi', 'harga pertalite naik'],
240 'release_timestamp': 1662859088,
241 'release_date': '20220911',
242 'uploader': 'Asfahan Yahsyi',
245 'url': 'https://www.cnnindonesia.com/internasional/20220911104341-139-846189/video-momen-charles-disambut-meriah-usai-dilantik-jadi-raja-inggris',
249 'upload_date': '20220911',
251 'timestamp': 1662869995,
252 'description': 'md5:ece7b003b3ee7d81c6a5cfede7d5397d',
253 'thumbnail': r
're:https://akcdn\.detik\.net\.id/visual/2022/09/11/thumbnail-video-1_169\.jpeg',
254 'title': 'VIDEO: Momen Charles Disambut Meriah usai Dilantik jadi Raja Inggris',
255 'tags': ['raja charles', 'raja charles iii', 'ratu elizabeth', 'ratu elizabeth meninggal dunia', 'raja inggris', 'inggris'],
257 'release_date': '20220911',
258 'uploader': 'REUTERS',
259 'release_timestamp': 1662869995,
263 def _real_extract(self
, url
):
264 upload_date
, video_id
, display_id
= self
._match
_valid
_url
(url
).group('upload_date', 'id', 'display_id')
265 webpage
= self
._download
_webpage
(url
, display_id
)
267 json_ld_list
= list(self
._yield
_json
_ld
(webpage
, display_id
))
268 json_ld_data
= self
._json
_ld
(json_ld_list
, display_id
)
270 json_ld
.get('embedUrl') for json_ld
in json_ld_list
if json_ld
.get('@type') == 'VideoObject')
272 return merge_dicts(json_ld_data
, {
273 '_type': 'url_transparent',
275 'upload_date': upload_date
,
276 'tags': try_call(lambda: self
._html
_search
_meta
('keywords', webpage
).split(', ')),