4 from .common
import InfoExtractor
11 class SlideshareIE(InfoExtractor
):
12 _VALID_URL
= r
'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
15 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
19 'title': 'Managing Scale and Complexity',
20 'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
24 def _real_extract(self
, url
):
25 mobj
= self
._match
_valid
_url
(url
)
26 page_title
= mobj
.group('title')
27 webpage
= self
._download
_webpage
(url
, page_title
)
28 slideshare_obj
= self
._search
_regex
(
29 r
'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);',
30 webpage
, 'slideshare object')
31 info
= json
.loads(slideshare_obj
)
32 if info
['slideshow']['type'] != 'video':
33 raise ExtractorError('Webpage type is "{}": only video extraction is supported for Slideshare'.format(info
['slideshow']['type']), expected
=True)
36 bucket
= info
['jsplayer']['video_bucket']
37 ext
= info
['jsplayer']['video_extension']
38 video_url
= urllib
.parse
.urljoin(bucket
, doc
+ '-SD.' + ext
)
39 description
= get_element_by_id('slideshow-description-paragraph', webpage
) or self
._html
_search
_regex
(
40 r
'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage
,
41 'description', fatal
=False)
45 'id': info
['slideshow']['id'],
46 'title': info
['slideshow']['title'],
49 'thumbnail': info
['slideshow']['pin_image_url'],
50 'description': description
.strip() if description
else None,