1 from .common
import InfoExtractor
13 class RottenTomatoesIE(InfoExtractor
):
14 _VALID_URL
= r
'https?://(?:www\.)?rottentomatoes\.com/m/(?P<playlist>[^/]+)(?:/(?P<tr>trailers)(?:/(?P<id>\w+))?)?'
17 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
21 'title': 'Toy Story 3',
22 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
24 'skip': 'No longer available',
26 'url': 'https://www.rottentomatoes.com/m/toy_story_3/trailers/VycaVoBKhGuk',
30 'title': 'Toy Story 3: Trailer 2',
32 'thumbnail': r
're:^https?://.*\.jpg$',
36 'url': 'http://www.rottentomatoes.com/m/toy_story_3',
39 'title': 'Toy Story 3',
41 'playlist_mincount': 4,
43 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers',
45 'id': 'toy_story_3-trailers',
47 'playlist_mincount': 5,
50 def _extract_videos(self
, data
, display_id
):
51 for video
in traverse_obj(data
, (lambda _
, v
: v
['publicId'] and v
['file'] and v
['type'] == 'hls')):
53 'formats': self
._extract
_m
3u8_formats
(
54 video
['file'], display_id
, 'mp4', m3u8_id
='hls', fatal
=False),
55 **traverse_obj(video
, {
58 'description': 'description',
59 'duration': ('durationInSeconds', {float_or_none}
),
60 'thumbnail': ('image', {url_or_none}
),
64 def _real_extract(self
, url
):
65 playlist_id
, trailers
, video_id
= self
._match
_valid
_url
(url
).group('playlist', 'tr', 'id')
66 playlist_id
= join_nonempty(playlist_id
, trailers
)
67 webpage
= self
._download
_webpage
(url
, playlist_id
)
68 data
= self
._search
_json
(
69 r
'<script[^>]+\bid=["\'](?
:heroV|v
)ideos
["\'][^>]*>', webpage,
70 'data', playlist_id, contains_pattern=r'\[{(?s:.+)}\]')
73 video_data = traverse_obj(data, lambda _, v: v['publicId'] == video_id)
75 raise ExtractorError('Unable to extract video from webpage')
76 return next(self._extract_videos(video_data, video_id))
78 return self.playlist_result(
79 self._extract_videos(data, playlist_id), playlist_id,
80 clean_html(get_element_by_class('scoreboard__title', webpage)))