[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / walla.py
blob442a9bcf9fc1dd6822611ce8e78c4798a798acce
1 import re
3 from .common import InfoExtractor
4 from ..utils import (
5 int_or_none,
6 xpath_text,
10 class WallaIE(InfoExtractor):
11 _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
12 _TEST = {
13 'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
14 'info_dict': {
15 'id': '2642630',
16 'display_id': 'one-direction-all-for-one',
17 'ext': 'flv',
18 'title': 'וואן דיירקשן: ההיסטריה',
19 'description': 'md5:de9e2512a92442574cdb0913c49bc4d8',
20 'thumbnail': r're:^https?://.*\.jpg',
21 'duration': 3600,
23 'params': {
24 # rtmp download
25 'skip_download': True,
29 _SUBTITLE_LANGS = {
30 'עברית': 'heb',
33 def _real_extract(self, url):
34 mobj = self._match_valid_url(url)
35 video_id = mobj.group('id')
36 display_id = mobj.group('display_id')
38 video = self._download_xml(
39 f'http://video2.walla.co.il/?w=null/null/{video_id}/@@/video/flv_pl',
40 display_id)
42 item = video.find('./items/item')
44 title = xpath_text(item, './title', 'title')
45 description = xpath_text(item, './synopsis', 'description')
46 thumbnail = xpath_text(item, './preview_pic', 'thumbnail')
47 duration = int_or_none(xpath_text(item, './duration', 'duration'))
49 subtitles = {}
50 for subtitle in item.findall('./subtitles/subtitle'):
51 lang = xpath_text(subtitle, './title')
52 subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
53 'ext': 'srt',
54 'url': xpath_text(subtitle, './src'),
57 formats = []
58 for quality in item.findall('./qualities/quality'):
59 format_id = xpath_text(quality, './title')
60 fmt = {
61 'url': 'rtmp://wafla.walla.co.il/vod',
62 'play_path': xpath_text(quality, './src'),
63 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf',
64 'page_url': url,
65 'ext': 'flv',
66 'format_id': xpath_text(quality, './title'),
68 m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
69 if m:
70 fmt['height'] = int(m.group('height'))
71 formats.append(fmt)
73 return {
74 'id': video_id,
75 'display_id': display_id,
76 'title': title,
77 'description': description,
78 'thumbnail': thumbnail,
79 'duration': duration,
80 'formats': formats,
81 'subtitles': subtitles,