5 import xml
.etree
.ElementTree
7 from .adobepass
import AdobePassIE
8 from .common
import InfoExtractor
9 from .theplatform
import ThePlatformIE
, default_ns
10 from ..networking
import HEADRequest
34 class NBCIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
35 _VALID_URL
= r
'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
39 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
43 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
44 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
45 'timestamp': 1424246400,
46 'upload_date': '20150218',
47 'uploader': 'NBCU-COM',
48 'episode': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
52 'series': 'Tonight Show: Jimmy Fallon',
54 'chapters': 'count:1',
56 'thumbnail': r
're:https?://.+\.jpg',
57 'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
58 'media_type': 'Full Episode',
61 'skip_download': 'm3u8',
65 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
69 'title': 'Star Wars Teaser',
70 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
71 'timestamp': 1417852800,
72 'upload_date': '20141206',
73 'uploader': 'NBCU-COM',
75 'skip': 'page not found',
78 # HLS streams requires the 'hdnea3' cookie
79 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
81 'id': '101528f5a9e8127b107e98c5e6ce4638',
84 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
85 'timestamp': 1237100400,
86 'upload_date': '20090315',
87 'uploader': 'NBCU-COM',
89 'skip': 'page not found',
92 # manifest url does not have extension
93 'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
97 'title': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
98 'episode': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
100 'season': 'Season 75',
102 'series': 'The Golden Globe Awards',
103 'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
104 'uploader': 'NBCU-COM',
105 'upload_date': '20180107',
106 'timestamp': 1515312000,
109 'thumbnail': r
're:https?://.+\.jpg',
110 'chapters': 'count:1',
113 'skip_download': 'm3u8',
117 # new video_id format
118 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
120 'id': 'NBCE125189978',
122 'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
123 'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
124 'uploader': 'NBCU-COM',
125 'series': 'Quantum Leap',
126 'season': 'Season 1',
128 'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
132 'timestamp': 1663956155,
133 'upload_date': '20220923',
136 'thumbnail': r
're:https?://.+\.jpg',
137 'categories': ['Series/Quantum Leap 2022'],
138 'media_type': 'Highlight',
141 'skip_download': 'm3u8',
145 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
146 'only_matching': True,
149 # Percent escaped url
150 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
151 'only_matching': True,
155 def _real_extract(self
, url
):
156 permalink
, video_id
= self
._match
_valid
_url
(url
).groups()
157 permalink
= 'http' + urllib
.parse
.unquote(permalink
)
158 video_data
= self
._download
_json
(
159 'https://friendship.nbc.co/v2/graphql', video_id
, query
={
160 'query': '''query bonanzaPage(
161 $app: NBCUBrands! = nbc
164 $platform: SupportedPlatforms! = web
165 $type: EntityPageType! = VIDEO
177 ... on VideoPageData {
193 'variables': json
.dumps({
198 })['data']['bonanzaPage']['metadata']
202 'switch': 'HLSServiceSecure',
204 video_id
= video_data
['mpxGuid']
205 tp_path
= 'NnzsPC/media/guid/{}/{}'.format(video_data
.get('mpxAccountId') or '2410887629', video_id
)
206 tpm
= self
._download
_theplatform
_metadata
(tp_path
, video_id
)
207 title
= tpm
.get('title') or video_data
.get('secondaryTitle')
208 if video_data
.get('locked'):
209 resource
= self
._get
_mvpd
_resource
(
210 video_data
.get('resourceId') or 'nbcentertainment',
211 title
, video_id
, video_data
.get('rating'))
212 query
['auth'] = self
._extract
_mvpd
_auth
(
213 url
, video_id
, 'nbcentertainment', resource
)
214 theplatform_url
= smuggle_url(update_url_query(
215 'http://link.theplatform.com/s/NnzsPC/media/guid/{}/{}'.format(video_data
.get('mpxAccountId') or '2410887629', video_id
),
216 query
), {'force_smil_url': True})
218 # Empty string or 0 can be valid values for these. So the check must be `is None`
219 description
= video_data
.get('description')
220 if description
is None:
221 description
= tpm
.get('description')
222 episode_number
= int_or_none(video_data
.get('episodeNumber'))
223 if episode_number
is None:
224 episode_number
= int_or_none(tpm
.get('nbcu$airOrder'))
225 rating
= video_data
.get('rating')
227 try_get(tpm
, lambda x
: x
['ratings'][0]['rating'])
228 season_number
= int_or_none(video_data
.get('seasonNumber'))
229 if season_number
is None:
230 season_number
= int_or_none(tpm
.get('nbcu$seasonNumber'))
231 series
= video_data
.get('seriesShortTitle')
233 series
= tpm
.get('nbcu$seriesShortTitle')
234 tags
= video_data
.get('keywords')
235 if tags
is None or len(tags
) == 0:
236 tags
= tpm
.get('keywords')
239 '_type': 'url_transparent',
240 'age_limit': parse_age_limit(rating
),
241 'description': description
,
243 'episode_number': episode_number
,
245 'ie_key': 'ThePlatform',
246 'season_number': season_number
,
250 'url': theplatform_url
,
254 class NBCSportsVPlayerIE(InfoExtractor
):
255 _VALID_URL_BASE
= r
'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
256 _VALID_URL
= _VALID_URL_BASE
+ r
'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
257 _EMBED_REGEX
= [rf
'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>{_VALID_URL_BASE}[^\"]+)']
260 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
262 'id': '9CsDKds0kvHI',
264 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
265 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
266 'timestamp': 1426270238,
267 'upload_date': '20150313',
268 'uploader': 'NBCU-SPORTS',
271 'thumbnail': r
're:^https?://.*\.jpg$',
274 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2',
275 'only_matching': True,
277 'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
278 'only_matching': True,
281 def _real_extract(self
, url
):
282 video_id
= self
._match
_id
(url
)
283 webpage
= self
._download
_webpage
(url
, video_id
)
284 theplatform_url
= self
._html
_search
_regex
(r
'tp:releaseUrl="(.+?)"', webpage
, 'url')
285 return self
.url_result(theplatform_url
, 'ThePlatform')
288 class NBCSportsIE(InfoExtractor
):
289 _VALID_URL
= r
'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
293 'url': 'https://www.nbcsports.com/watch/nfl/profootballtalk/pft-pm/unpacking-addisons-reckless-driving-citation',
295 'id': 'PHJSaFWbrTY9',
297 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
298 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
299 'uploader': 'NBCU-SPORTS',
300 'upload_date': '20150330',
301 'timestamp': 1427726529,
303 'thumbnail': 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg',
308 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
309 'only_matching': True,
312 'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
313 'only_matching': True,
316 def _real_extract(self
, url
):
317 video_id
= self
._match
_id
(url
)
318 webpage
= self
._download
_webpage
(url
, video_id
)
319 return self
.url_result(
320 NBCSportsVPlayerIE
._extract
_url
(webpage
), 'NBCSportsVPlayer')
323 class NBCSportsStreamIE(AdobePassIE
):
324 _VALID_URL
= r
'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
326 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
330 'title': 'Amgen Tour of California Women\'s Recap',
331 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
335 'skip_download': True,
337 'skip': 'Requires Adobe Pass Authentication',
340 def _real_extract(self
, url
):
341 video_id
= self
._match
_id
(url
)
342 live_source
= self
._download
_json
(
343 f
'http://stream.nbcsports.com/data/live_sources_{video_id}.json',
345 video_source
= live_source
['videoSources'][0]
346 title
= video_source
['title']
348 for k
in ('source', 'msl4source', 'iossource', 'hlsv4'):
350 source_url
= video_source
.get(sk
) or video_source
.get(sk
+ 'Alt')
354 source_url
= video_source
['ottStreamUrl']
355 is_live
= video_source
.get('type') == 'live' or video_source
.get('status') == 'Live'
356 resource
= self
._get
_mvpd
_resource
('nbcsports', title
, video_id
, '')
357 token
= self
._extract
_mvpd
_auth
(url
, video_id
, 'nbcsports', resource
)
358 tokenized_url
= self
._download
_json
(
359 'https://token.playmakerservices.com/cdn',
360 video_id
, data
=json
.dumps({
361 'requestorId': 'nbcsports',
363 'application': 'NBCSports',
365 'platform': 'desktop',
367 'url': video_source
['sourceUrl'],
368 'token': base64
.b64encode(token
.encode()).decode(),
369 'resourceId': base64
.b64encode(resource
.encode()).decode(),
370 }).encode())['tokenizedUrl']
371 formats
= self
._extract
_m
3u8_formats
(tokenized_url
, video_id
, 'mp4')
375 'description': live_source
.get('description'),
381 class NBCNewsIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
382 _VALID_URL
= r
'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
383 _EMBED_REGEX
= [r
'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//www\
.nbcnews\
.com
/widget
/video
-embed
/[^
"\']+)\1']
387 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
388 'md5': 'fb3dcd2d7b1dd9804305fa2fc95ab610', # md5 tends to fluctuate
390 'id': '269389891880',
392 'title': 'How Twitter Reacted To The Snowden Interview',
393 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
394 'timestamp': 1401363060,
395 'upload_date': '20140529',
397 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/140529/p_tweet_snow_140529.jpg',
401 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
402 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
404 'id': '529953347624',
406 'title': 'FULL EPISODE: Family Business',
407 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
409 'skip': 'This page is unavailable.',
412 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
413 'md5': '40d0e48c68896359c80372306ece0fc3',
415 'id': '394064451844',
417 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
418 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
419 'timestamp': 1423104900,
420 'upload_date': '20150205',
422 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/nn_netcast_150204.jpg',
426 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
427 'md5': 'ffb59bcf0733dc3c7f0ace907f5e3939',
431 'title': "Volkswagen U
.S
. Chief
: We
'Totally Screwed Up'",
432 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
433 'upload_date': '20150922',
434 'timestamp': 1442917800,
436 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/x_lon_vwhorn_150922.jpg',
440 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
441 'md5': '693d1fa21d23afcc9b04c66b227ed9ff',
443 'id': '669831235788',
445 'title': 'See the aurora borealis from space in stunning new NASA video',
446 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
447 'upload_date': '20160420',
448 'timestamp': 1461152093,
450 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/201604/2016-04-20T11-35-09-133Z--1280x720.jpg',
454 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
455 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
457 'id': '314487875924',
459 'title': 'The chaotic GOP immigration vote',
460 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
461 'thumbnail': r're:^https?://.*\.jpg$',
462 'timestamp': 1406937606,
463 'upload_date': '20140802',
468 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
469 'only_matching': True,
472 # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
473 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
474 'only_matching': True,
478 def _real_extract(self, url):
479 video_id = self._match_id(url)
480 webpage = self._download_webpage(url, video_id)
482 data = self._search_nextjs_data(webpage, video_id)['props']['initialState']
483 video_data = try_get(data, lambda x: x['video']['current'], dict)
485 video_data = data['article']['content'][0]['primaryMedia']['video']
486 title = video_data['headline']['primary']
489 for va in video_data.get('videoAssets', []):
490 public_url = va.get('publicUrl')
493 if '://link.theplatform.com/' in public_url:
494 public_url = update_url_query(public_url, {'format': 'redirect'})
495 format_id = va.get('format')
496 if format_id == 'M3U':
497 formats.extend(self._extract_m3u8_formats(
498 public_url, video_id, 'mp4', 'm3u8_native',
499 m3u8_id=format_id, fatal=False))
501 tbr = int_or_none(va.get('bitrate'), 1000)
503 'format_id': join_nonempty(format_id, tbr),
505 'width': int_or_none(va.get('width')),
506 'height': int_or_none(va.get('height')),
512 closed_captioning = video_data.get('closedCaptioning')
513 if closed_captioning:
514 for cc_url in closed_captioning.values():
517 subtitles.setdefault('en', []).append({
524 'description': try_get(video_data, lambda x: x['description']['primary']),
525 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
526 'duration': parse_duration(video_data.get('duration')),
527 'timestamp': unified_timestamp(video_data.get('datePublished')),
529 'subtitles': subtitles,
533 class NBCOlympicsIE(InfoExtractor):
534 IE_NAME = 'nbcolympics'
535 _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
538 # Geo-restricted to US
539 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
540 'md5': '54fecf846d05429fbaa18af557ee523a',
542 'id': 'WjTBzDXx5AUq',
543 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
545 'title': 'Rose\'s son Leo was in tears after his dad won gold',
546 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
547 'timestamp': 1471274964,
548 'upload_date': '20160815',
549 'uploader': 'NBCU-SPORTS',
551 'skip': '404 Not Found',
554 def _real_extract(self, url):
555 display_id = self._match_id(url)
557 webpage = self._download_webpage(url, display_id)
560 drupal_settings = self._parse_json(self._search_regex(
561 r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
562 webpage, 'drupal settings'), display_id)
564 iframe_url = drupal_settings['vod']['iframe_url']
565 theplatform_url = iframe_url.replace(
566 'vplayer.nbcolympics.com', 'player.theplatform.com')
567 except RegexNotFoundError:
568 theplatform_url = self._search_regex(
569 r"([\"'])embedUrl\1: *([\"'])(?P
<embedUrl
>.+)\
2",
570 webpage, 'embedding URL', group='embedUrl')
573 '_type': 'url_transparent',
574 'url': theplatform_url,
575 'ie_key': ThePlatformIE.ie_key(),
576 'display_id': display_id,
580 class NBCOlympicsStreamIE(AdobePassIE):
581 IE_NAME = 'nbcolympics:stream'
582 _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
585 'note': 'Tokenized m3u8 source URL',
586 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
590 'title': r"re
:Women
's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
593 'skip_download
': 'm3u8
',
595 'skip
': 'Livestream
',
597 'note
': 'Plain m3u8 source URL
',
598 'url
': 'https
://stream
.nbcolympics
.com
/gymnastics
-event
-finals
-mens
-floor
-pommel
-horse
-womens
-vault
-bars
',
602 'title
': r're
:Event Finals
: M Floor
, W Vault
, M Pommel
, W Uneven Bars
[0-9]{4}
-[0-9]{2}
-[0-9]{2}
[0-9]{2}
:[0-9]{2}$
',
605 'skip_download
': 'm3u8
',
607 'skip
': 'Livestream
',
611 def _real_extract(self, url):
612 display_id = self._match_id(url)
613 webpage = self._download_webpage(url, display_id)
614 pid = self._search_regex(r'pid\s
*=\s
*(\d
+);', webpage, 'pid
')
616 event_config = self._download_json(
617 f'http
://stream
.nbcolympics
.com
/data
/event_config_{pid}
.json
',
618 pid, 'Downloading event config
')['eventConfig
']
620 title = event_config['eventTitle
']
621 is_live = {'live
': True, 'replay
': False}.get(event_config.get('eventStatus
'))
623 source_url = self._download_json(
624 f'https
://api
-leap
.nbcsports
.com
/feeds
/assets
/{pid}?application
=NBCOlympics
&platform
=desktop
&format
=nbc
-player
&env
=staging
',
625 pid, 'Downloading leap config
',
626 )['videoSources
'][0]['cdnSources
']['primary
'][0]['sourceUrl
']
628 if event_config.get('cdnToken
'):
629 ap_resource = self._get_mvpd_resource(
630 event_config.get('resourceId
', 'NBCOlympics
'),
631 re.sub(r'[^\w\d
]+', '', event_config['eventTitle
']), pid,
632 event_config.get('ratingId
', 'NO VALUE
'))
633 media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId
', 'NBCOlympics
'), ap_resource)
635 source_url = self._download_json(
636 'https
://tokens
.playmakerservices
.com
/', pid, 'Retrieving tokenized URL
',
638 'application
': 'NBCSports
',
639 'authentication
-type': 'adobe
-pass',
642 'platform
': 'desktop
',
643 'requestorId
': 'NBCOlympics
',
644 'resourceId
': base64.b64encode(ap_resource.encode()).decode(),
645 'token
': base64.b64encode(media_token.encode()).decode(),
649 )['akamai
'][0]['tokenizedUrl
']
651 formats = self._extract_m3u8_formats(source_url, pid, 'mp4
', live=is_live)
653 # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
654 # download with ffmpeg without this option
655 f['downloader_options
'] = {'ffmpeg_args
': ['-seekable
', '0', '-http_seekable
', '0', '-icy
', '0']}
659 'display_id
': display_id,
666 class NBCStationsIE(InfoExtractor):
667 _DOMAIN_RE = '|
'.join(map(re.escape, (
668 'nbcbayarea
', 'nbcboston
', 'nbcchicago
', 'nbcconnecticut
', 'nbcdfw
', 'nbclosangeles
',
669 'nbcmiami
', 'nbcnewyork
', 'nbcphiladelphia
', 'nbcsandiego
', 'nbcwashington
',
670 'necn
', 'telemundo52
', 'telemundoarizona
', 'telemundochicago
', 'telemundonuevainglaterra
',
672 _VALID_URL = rf'https?
://(?
:www\
.)?
(?P
<site
>{_DOMAIN_RE}
)\
.com
/(?
:[^
/?
#]+/)*(?P<id>[^/?#]+)/?(?:$|[#?])'
675 'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/',
679 'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
680 'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
682 'timestamp': 1661135892,
683 'upload_date': '20220822',
685 'channel_id': 'KNBC',
686 'channel': 'nbclosangeles',
689 'skip_download': 'm3u8',
692 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
696 'title': 'Huracán complica que televidente de Tucson reciba reembolso',
697 'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
699 'timestamp': 1660886507,
700 'upload_date': '20220819',
701 'uploader': 'Telemundo Arizona',
702 'channel_id': 'KTAZ',
703 'channel': 'telemundoarizona',
706 'skip_download': 'm3u8',
710 'url': 'https://www.nbcboston.com/weather/video-weather/highs-near-freezing-in-boston-on-wednesday/2961135/',
711 'md5': '9bf8c41dc7abbb75b1a44f1491a4cc85',
715 'title': 'Highs Near Freezing in Boston on Wednesday',
716 'description': 'md5:3ec486609a926c99f00a3512e6c0e85b',
718 'timestamp': 1675268656,
719 'upload_date': '20230201',
721 'channel_id': 'WBTS',
722 'channel': 'nbcboston',
734 def _real_extract(self
, url
):
735 channel
, video_id
= self
._match
_valid
_url
(url
).group('site', 'id')
736 webpage
= self
._download
_webpage
(url
, video_id
)
738 nbc_data
= self
._search
_json
(
739 r
'<script>\s*var\s+nbc\s*=', webpage
, 'NBC JSON data', video_id
)
740 pdk_acct
= nbc_data
.get('pdkAcct') or 'Yh1nAC'
741 fw_ssid
= traverse_obj(nbc_data
, ('video', 'fwSSID'))
743 video_data
= self
._search
_json
(
744 r
'data-videos="\[', webpage
, 'video data', video_id
, default
={}, transform_source
=unescapeHTML
)
745 video_data
.update(self
._search
_json
(
746 r
'data-meta="', webpage
, 'metadata', video_id
, default
={}, transform_source
=unescapeHTML
))
748 raise ExtractorError('No video metadata found in webpage', expected
=True)
750 info
, formats
= {}, []
751 is_live
= int_or_none(video_data
.get('mpx_is_livestream')) == 1
753 'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
755 'fwsitesection': fw_ssid
,
756 'fwNetworkID': traverse_obj(nbc_data
, ('video', 'fwNetworkID'), default
='382114'),
757 'pprofile': 'ots_desktop_html',
758 'sensitive': 'false',
761 'mode': 'LIVE' if is_live
else 'on-demand',
768 player_id
= traverse_obj(video_data
, ((None, ('video', 'meta')), (
769 'mpx_m3upid', 'mpx_pid', 'pid_streaming_web_medium')), get_all
=False)
770 info
['title'] = f
'{channel} livestream'
773 player_id
= traverse_obj(video_data
, (
774 (None, ('video', 'meta')), ('pid_streaming_web_high', 'mpx_pid')), get_all
=False)
776 date_string
= traverse_obj(video_data
, 'date_string', 'date_gmt')
778 date_string
= self
._search
_regex
(
779 r
'datetime="([^"]+)"', date_string
, 'date string', fatal
=False)
781 date_string
= traverse_obj(
782 nbc_data
, ('dataLayer', 'adobe', ('prop70', 'eVar70', 'eVar59')), get_all
=False)
784 video_url
= traverse_obj(video_data
, ((None, ('video', 'meta')), 'mp4_url'), get_all
=False)
786 ext
= determine_ext(video_url
)
787 height
= self
._search
_regex
(r
'\d+-(\d+)p', url_basename(video_url
), 'height', default
=None)
791 'width': int_or_none(self
._RESOLUTIONS
.get(height
)),
792 'height': int_or_none(height
),
793 'format_id': f
'http-{ext}',
797 'title': video_data
.get('title') or traverse_obj(nbc_data
, (
798 'dataLayer', (None, 'adobe'), ('contenttitle', 'title', 'prop22')), get_all
=False),
800 traverse_obj(video_data
, 'summary', 'excerpt', 'video_hero_text')
801 or clean_html(traverse_obj(nbc_data
, ('dataLayer', 'summary'))),
802 'timestamp': unified_timestamp(date_string
),
806 if player_id
and fw_ssid
:
807 smil
= self
._download
_xml
(
808 f
'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id
,
809 note
='Downloading SMIL data', query
=query
, fatal
=is_live
)
810 if not isinstance(smil
, xml
.etree
.ElementTree
.Element
):
812 subtitles
= self
._parse
_smil
_subtitles
(smil
, default_ns
) if smil
is not None else {}
813 for video
in smil
.findall(self
._xpath
_ns
('.//video', default_ns
)) if smil
is not None else []:
814 info
['duration'] = float_or_none(remove_end(video
.get('dur'), 'ms'), 1000)
815 video_src_url
= video
.get('src')
816 ext
= mimetype2ext(video
.get('type'), default
=determine_ext(video_src_url
))
818 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
819 video_src_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=is_live
,
820 live
=is_live
, errnote
='No HLS formats found')
822 self
._merge
_subtitles
(subs
, target
=subtitles
)
825 'url': video_src_url
,
826 'format_id': f
'https-{ext}',
828 'width': int_or_none(video
.get('width')),
829 'height': int_or_none(video
.get('height')),
833 self
.raise_no_formats('No video content found in webpage', expected
=True)
836 self
._request
_webpage
(
837 HEADRequest(formats
[0]['url']), video_id
, note
='Checking live status')
838 except ExtractorError
:
839 raise UserNotLive(video_id
=channel
)
844 'channel_id': nbc_data
.get('callLetters'),
845 'uploader': nbc_data
.get('on_air_name'),
847 'subtitles': subtitles
,