1 from .common
import InfoExtractor
2 from .kaltura
import KalturaIE
5 class IncIE(InfoExtractor
):
6 _VALID_URL
= r
'https?://(?:www\.)?inc\.com/(?:[^/]+/)+(?P<id>[^.]+).html'
8 'url': 'http://www.inc.com/tip-sheet/bill-gates-says-these-5-books-will-make-you-smarter.html',
9 'md5': '7416739c9c16438c09fa35619d6ba5cb',
13 'title': 'Bill Gates Says These 5 Books Will Make You Smarter',
14 'description': 'md5:bea7ff6cce100886fc1995acb743237e',
15 'timestamp': 1474414430,
16 'upload_date': '20160920',
17 'uploader_id': 'video@inc.com',
20 'skip_download': True,
23 # div with id=kaltura_player_1_kqs38cgm
24 'url': 'https://www.inc.com/oscar-raymundo/richard-branson-young-entrepeneurs.html',
28 'title': 'Branson: "In the end, you have to say, Screw it. Just do it."',
29 'description': 'md5:21b832d034f9af5191ca5959da5e9cb6',
30 'timestamp': 1364403232,
31 'upload_date': '20130327',
32 'uploader_id': 'incdigital@inc.com',
35 'skip_download': True,
38 'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
39 'only_matching': True,
42 def _real_extract(self
, url
):
43 display_id
= self
._match
_id
(url
)
44 webpage
= self
._download
_webpage
(url
, display_id
)
46 partner_id
= self
._search
_regex
(
47 r
'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d
+)', webpage,
48 'partner
id', default='1034971')
50 kaltura_id = self._search_regex(
51 r'id=(["\'])kaltura_player_(?P<id>.+?)\1', webpage, 'kaltura id',
52 default=None, group='id') or self._parse_json(self._search_regex(
53 r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
54 display_id)['vid_kaltura_id']
56 return self.url_result(
57 f'kaltura:{partner_id}:{kaltura_id}', KalturaIE.ie_key())