3 from .common
import InfoExtractor
4 from ..utils
import extract_attributes
7 class TheSunIE(InfoExtractor
):
8 _VALID_URL
= r
'https?://(?:www\.)?the-?sun(\.co\.uk|\.com)/[^/]+/(?P<id>\d+)'
10 'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
13 'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
17 'url': 'https://www.the-sun.com/entertainment/7611415/1000lb-sisters-fans-rip-amy-dangerous-health-decision/',
20 'title': 'md5:e0b9b976f79dc770e5c80f22f40bb844',
24 BRIGHTCOVE_URL_TEMPLATE
= 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
26 def _real_extract(self
, url
):
27 article_id
= self
._match
_id
(url
)
29 webpage
= self
._download
_webpage
(url
, article_id
)
32 for video
in re
.findall(
33 r
'<video[^>]+data-video-id-pending=[^>]+>',
35 attrs
= extract_attributes(video
)
36 video_id
= attrs
['data-video-id-pending']
37 account_id
= attrs
.get('data-account', '5067014667001')
38 entries
.append(self
.url_result(
39 self
.BRIGHTCOVE_URL_TEMPLATE
% (account_id
, video_id
),
40 'BrightcoveNew', video_id
))
42 return self
.playlist_result(
43 entries
, article_id
, self
._og
_search
_title
(webpage
, fatal
=False))