[cleanup] Make more playlist entries lazy (#11763)
[yt-dlp.git] / yt_dlp / extractor / xstream.py
blobf7b48322cdf66f69c6cd8759afedaaf50fde3b19
1 import re
3 from .common import InfoExtractor
4 from ..utils import (
5 find_xpath_attr,
6 int_or_none,
7 parse_iso8601,
8 xpath_text,
9 xpath_with_ns,
13 class XstreamIE(InfoExtractor):
14 _VALID_URL = r'''(?x)
15 (?:
16 xstream:|
17 https?://frontend\.xstream\.(?:dk|net)/
19 (?P<partner_id>[^/]+)
20 (?:
22 /feed/video/\?.*?\bid=
24 (?P<id>\d+)
25 '''
26 _TESTS = [{
27 'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
28 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
29 'info_dict': {
30 'id': '86588',
31 'ext': 'mov',
32 'title': 'Otto Wollertsen',
33 'description': 'Vestlendingen Otto Fredrik Wollertsen',
34 'timestamp': 1430473209,
35 'upload_date': '20150501',
37 }, {
38 'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
39 'only_matching': True,
42 def _extract_video_info(self, partner_id, video_id):
43 data = self._download_xml(
44 f'http://frontend.xstream.dk/{partner_id}/feed/video/?platform=web&id={video_id}',
45 video_id)
47 NS_MAP = {
48 'atom': 'http://www.w3.org/2005/Atom',
49 'xt': 'http://xstream.dk/',
50 'media': 'http://search.yahoo.com/mrss/',
53 entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
55 title = xpath_text(
56 entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
57 description = xpath_text(
58 entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
59 timestamp = parse_iso8601(xpath_text(
60 entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
62 formats = []
63 media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
64 for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
65 media_url = media_content.get('url')
66 if not media_url:
67 continue
68 tbr = int_or_none(media_content.get('bitrate'))
69 mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
70 if mobj:
71 formats.append({
72 'url': mobj.group('url'),
73 'play_path': 'mp4:{}'.format(mobj.group('playpath')),
74 'app': mobj.group('app'),
75 'ext': 'flv',
76 'tbr': tbr,
77 'format_id': 'rtmp-%d' % tbr,
79 else:
80 formats.append({
81 'url': media_url,
82 'tbr': tbr,
85 link = find_xpath_attr(
86 entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
87 if link is not None:
88 formats.append({
89 'url': link.get('href'),
90 'format_id': link.get('rel'),
91 'quality': 1,
94 thumbnails = [{
95 'url': splash.get('url'),
96 'width': int_or_none(splash.get('width')),
97 'height': int_or_none(splash.get('height')),
98 } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
100 return {
101 'id': video_id,
102 'title': title,
103 'description': description,
104 'timestamp': timestamp,
105 'formats': formats,
106 'thumbnails': thumbnails,
109 def _real_extract(self, url):
110 mobj = self._match_valid_url(url)
111 partner_id = mobj.group('partner_id')
112 video_id = mobj.group('id')
114 return self._extract_video_info(partner_id, video_id)