[cleanup] Make more playlist entries lazy (#11763)
[yt-dlp.git] / yt_dlp / extractor / spreaker.py
blobc64c2fcd2ea0b79706f0e7e195e033d88306f44e
1 import itertools
3 from .common import InfoExtractor
4 from ..utils import (
5 filter_dict,
6 float_or_none,
7 int_or_none,
8 parse_qs,
9 str_or_none,
10 try_get,
11 unified_timestamp,
12 url_or_none,
14 from ..utils.traversal import traverse_obj
17 def _extract_episode(data, episode_id=None):
18 title = data['title']
19 download_url = data['download_url']
21 series = try_get(data, lambda x: x['show']['title'], str)
22 uploader = try_get(data, lambda x: x['author']['fullname'], str)
24 thumbnails = []
25 for image in ('image_original', 'image_medium', 'image'):
26 image_url = url_or_none(data.get(f'{image}_url'))
27 if image_url:
28 thumbnails.append({'url': image_url})
30 def stats(key):
31 return int_or_none(try_get(
32 data,
33 (lambda x: x[f'{key}s_count'],
34 lambda x: x['stats'][f'{key}s'])))
36 def duration(key):
37 return float_or_none(data.get(key), scale=1000)
39 return {
40 'id': str(episode_id or data['episode_id']),
41 'url': download_url,
42 'display_id': data.get('permalink'),
43 'title': title,
44 'description': data.get('description'),
45 'timestamp': unified_timestamp(data.get('published_at')),
46 'uploader': uploader,
47 'uploader_id': str_or_none(data.get('author_id')),
48 'creator': uploader,
49 'duration': duration('duration') or duration('length'),
50 'view_count': stats('play'),
51 'like_count': stats('like'),
52 'comment_count': stats('message'),
53 'format': 'MPEG Layer 3',
54 'format_id': 'mp3',
55 'container': 'mp3',
56 'ext': 'mp3',
57 'thumbnails': thumbnails,
58 'series': series,
59 'extractor_key': SpreakerIE.ie_key(),
63 class SpreakerIE(InfoExtractor):
64 _VALID_URL = [
65 r'https?://api\.spreaker\.com/(?:(?:download/)?episode|v2/episodes)/(?P<id>\d+)',
66 r'https?://(?:www\.)?spreaker\.com/episode/[^#?/]*?(?P<id>\d+)/?(?:[?#]|$)',
68 _TESTS = [{
69 'url': 'https://api.spreaker.com/episode/12534508',
70 'info_dict': {
71 'id': '12534508',
72 'display_id': 'swm-ep15-how-to-market-your-music-part-2',
73 'ext': 'mp3',
74 'title': 'EP:15 | Music Marketing (Likes) - Part 2',
75 'description': 'md5:0588c43e27be46423e183076fa071177',
76 'timestamp': 1502250336,
77 'upload_date': '20170809',
78 'uploader': 'SWM',
79 'uploader_id': '9780658',
80 'duration': 1063.42,
81 'view_count': int,
82 'like_count': int,
83 'comment_count': int,
84 'series': 'Success With Music | SWM',
85 'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/777ce4f96b71b0e1b7c09a5e625210e3.jpg',
86 'creators': ['SWM'],
88 }, {
89 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
90 'only_matching': True,
91 }, {
92 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
93 'only_matching': True,
94 }, {
95 'note': 'episode',
96 'url': 'https://www.spreaker.com/episode/grunge-music-origins-the-raw-sound-that-defined-a-generation--60269615',
97 'info_dict': {
98 'id': '60269615',
99 'display_id': 'grunge-music-origins-the-raw-sound-that-',
100 'ext': 'mp3',
101 'title': 'Grunge Music Origins - The Raw Sound that Defined a Generation',
102 'description': str,
103 'timestamp': 1717468905,
104 'upload_date': '20240604',
105 'uploader': 'Katie Brown 2',
106 'uploader_id': '17733249',
107 'duration': 818.83,
108 'view_count': int,
109 'like_count': int,
110 'comment_count': int,
111 'series': '90s Grunge',
112 'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/bb0d4178f7cf57cc8786dedbd9c5d969.jpg',
113 'creators': ['Katie Brown 2'],
115 }, {
116 'url': 'https://www.spreaker.com/episode/60269615',
117 'only_matching': True,
120 def _real_extract(self, url):
121 episode_id = self._match_id(url)
122 data = self._download_json(
123 f'https://api.spreaker.com/v2/episodes/{episode_id}', episode_id,
124 query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode']
125 return _extract_episode(data, episode_id)
128 class SpreakerShowIE(InfoExtractor):
129 _VALID_URL = [
130 r'https?://api\.spreaker\.com/show/(?P<id>\d+)',
131 r'https?://(?:www\.)?spreaker\.com/podcast/[\w-]+--(?P<id>[\d]+)',
132 r'https?://(?:www\.)?spreaker\.com/show/(?P<id>\d+)/episodes/feed',
134 _TESTS = [{
135 'url': 'https://api.spreaker.com/show/4652058',
136 'info_dict': {
137 'id': '4652058',
139 'playlist_mincount': 118,
140 }, {
141 'url': 'https://www.spreaker.com/podcast/health-wealth--5918323',
142 'info_dict': {
143 'id': '5918323',
145 'playlist_mincount': 60,
146 }, {
147 'url': 'https://www.spreaker.com/show/5887186/episodes/feed',
148 'info_dict': {
149 'id': '5887186',
151 'playlist_mincount': 290,
154 def _entries(self, show_id, key=None):
155 for page_num in itertools.count(1):
156 episodes = self._download_json(
157 f'https://api.spreaker.com/show/{show_id}/episodes',
158 show_id, note=f'Downloading JSON page {page_num}', query=filter_dict({
159 'page': page_num,
160 'max_per_page': 100,
161 'key': key,
163 pager = try_get(episodes, lambda x: x['response']['pager'], dict)
164 if not pager:
165 break
166 results = pager.get('results')
167 if not results or not isinstance(results, list):
168 break
169 for result in results:
170 if not isinstance(result, dict):
171 continue
172 yield _extract_episode(result)
173 if page_num == pager.get('last_page'):
174 break
176 def _real_extract(self, url):
177 show_id = self._match_id(url)
178 key = traverse_obj(parse_qs(url), ('key', 0))
179 return self.playlist_result(self._entries(show_id, key), playlist_id=show_id)