[cleanup] Make more playlist entries lazy (#11763)
[yt-dlp.git] / yt_dlp / extractor / lsm.py
blob56c06d7458310abf81f51349869eff7ee433ab60
1 import re
2 import urllib.parse
4 from .common import InfoExtractor
5 from ..utils import (
6 ExtractorError,
7 determine_ext,
8 int_or_none,
9 js_to_json,
10 parse_iso8601,
11 parse_qs,
12 str_or_none,
13 url_or_none,
14 urljoin,
16 from ..utils.traversal import traverse_obj
19 class LSMLREmbedIE(InfoExtractor):
20 _VALID_URL = r'''(?x)
21 https?://(?:
22 (?:latvijasradio|lr1|lr2|klasika|lr4|naba|radioteatris)\.lsm|
23 pieci
24 )\.lv/[^/?#]+/(?:
25 pleijeris|embed
26 )/?\?(?:[^#]+&)?(?:show|id)=(?P<id>\d+)'''
27 _TESTS = [{
28 'url': 'https://latvijasradio.lsm.lv/lv/embed/?theme=black&size=16x9&showCaptions=0&id=183522',
29 'md5': '719b33875cd1429846eeeaeec6df2830',
30 'info_dict': {
31 'id': 'a342781',
32 'ext': 'mp3',
33 'duration': 1823,
34 'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
35 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/gallery_fd4675ac.jpg',
37 }, {
38 'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1270&theme=white&size=16x9',
39 'info_dict': {
40 'id': '1270',
42 'playlist_count': 3,
43 'playlist': [{
44 'md5': '2e61b6eceff00d14d57fdbbe6ab24cac',
45 'info_dict': {
46 'id': 'a297397',
47 'ext': 'mp3',
48 'title': 'Eriks Emanuels Šmits "Pilāta evaņģēlijs". 1. daļa',
49 'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f131ae81e3c.jpg',
50 'duration': 3300,
52 }],
53 }, {
54 'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1269&theme=white&size=16x9',
55 'md5': '24810d4a961da2295d9860afdcaf4f5a',
56 'info_dict': {
57 'id': 'a230690',
58 'ext': 'mp3',
59 'title': 'Jens Ahlboms "Spārni". Radioizrāde ar Mārtiņa Freimaņa mūziku',
60 'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f13023a457c.jpg',
61 'duration': 1788,
63 }, {
64 'url': 'https://lr1.lsm.lv/lv/embed/?id=166557&show=0&theme=white&size=16x9',
65 'info_dict': {
66 'id': '166557',
68 'playlist_count': 2,
69 'playlist': [{
70 'md5': '6a8b0927572f443f09c6e50a3ad65f2d',
71 'info_dict': {
72 'id': 'a303104',
73 'ext': 'mp3',
74 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
75 'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits',
76 'duration': 3222,
78 }, {
79 'md5': '5d5e191e718b7644e5118b7b4e093a6d',
80 'info_dict': {
81 'id': 'v303104',
82 'ext': 'mp4',
83 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
84 'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits - Video Version',
85 'duration': 3222,
87 }],
88 }, {
89 'url': 'https://lr1.lsm.lv/lv/embed/?id=183522&show=0&theme=white&size=16x9',
90 'only_matching': True,
91 }, {
92 'url': 'https://lr2.lsm.lv/lv/embed/?id=182126&show=0&theme=white&size=16x9',
93 'only_matching': True,
94 }, {
95 'url': 'https://klasika.lsm.lv/lv/embed/?id=110806&show=0&theme=white&size=16x9',
96 'only_matching': True,
97 }, {
98 'url': 'https://lr4.lsm.lv/lv/embed/?id=184282&show=0&theme=white&size=16x9',
99 'only_matching': True,
100 }, {
101 'url': 'https://pieci.lv/lv/embed/?id=168896&show=0&theme=white&size=16x9',
102 'only_matching': True,
103 }, {
104 'url': 'https://naba.lsm.lv/lv/embed/?id=182901&show=0&theme=white&size=16x9',
105 'only_matching': True,
106 }, {
107 'url': 'https://radioteatris.lsm.lv/lv/embed/?id=176439&show=0&theme=white&size=16x9',
108 'only_matching': True,
109 }, {
110 'url': 'https://lr1.lsm.lv/lv/pleijeris/?embed=0&id=48205&time=00%3A00&idx=0',
111 'only_matching': True,
114 def _real_extract(self, url):
115 query = parse_qs(url)
116 video_id = traverse_obj(query, (
117 ('show', 'id'), 0, {int_or_none}, filter, {str_or_none}), get_all=False)
118 webpage = self._download_webpage(url, video_id)
120 player_data, media_data = self._search_regex(
121 r'LR\.audio\.Player\s*\([^{]*(?P<player>\{.*?\}),(?P<media>\{.*\})\);',
122 webpage, 'player json', group=('player', 'media'))
124 player_json = self._parse_json(
125 player_data, video_id, transform_source=js_to_json, fatal=False) or {}
126 media_json = self._parse_json(media_data, video_id, transform_source=js_to_json)
128 entries = []
129 for item in traverse_obj(media_json, (('audio', 'video'), lambda _, v: v['id'])):
130 formats = []
131 for source_url in traverse_obj(item, ('sources', ..., 'file', {url_or_none})):
132 if determine_ext(source_url) == 'm3u8':
133 formats.extend(self._extract_m3u8_formats(source_url, video_id, fatal=False))
134 else:
135 formats.append({'url': source_url})
137 id_ = item['id']
138 title = item.get('title')
139 if id_.startswith('v') and not title:
140 title = traverse_obj(
141 media_json, ('audio', lambda _, v: v['id'][1:] == id_[1:], 'title',
142 {lambda x: x and f'{x} - Video Version'}), get_all=False)
144 entries.append({
145 'formats': formats,
146 'thumbnail': urljoin(url, player_json.get('poster')),
147 'id': id_,
148 'title': title,
149 'duration': traverse_obj(item, ('duration', {int_or_none})),
152 if len(entries) == 1:
153 return entries[0]
155 return self.playlist_result(entries, video_id)
158 class LSMLTVEmbedIE(InfoExtractor):
159 _VALID_URL = r'https?://ltv\.lsm\.lv/embed\?(?:[^#]+&)?c=(?P<id>[^#&]+)'
160 _TESTS = [{
161 'url': 'https://ltv.lsm.lv/embed?c=eyJpdiI6IjQzbHVUeHAyaDJiamFjcjdSUUFKdnc9PSIsInZhbHVlIjoiMHl3SnJNRmd2TmFIdnZwOGtGUUpzODFzUEZ4SVVsN2xoRjliSW9vckUyMWZIWG8vbWVzaFFkY0lhNmRjbjRpaCIsIm1hYyI6ImMzNjdhMzFhNTFhZmY1ZmE0NWI5YmFjZGI1YmJiNGEyNjgzNDM4MjUzMWEwM2FmMDMyZDMwYWM1MDFjZmM5MGIiLCJ0YWciOiIifQ==',
162 'md5': '64f72a360ca530d5ed89c77646c9eee5',
163 'info_dict': {
164 'id': '46k_d23-6000-105',
165 'ext': 'mp4',
166 'timestamp': 1700589151,
167 'duration': 1442,
168 'upload_date': '20231121',
169 'title': 'D23-6000-105_cetstud',
170 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
172 }, {
173 'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
174 'md5': 'a1711e190fe680fdb68fd8413b378e87',
175 'info_dict': {
176 'id': 'wUnFArIPDSY',
177 'ext': 'mp4',
178 'uploader': 'LTV_16plus',
179 'release_date': '20220514',
180 'channel_url': 'https://www.youtube.com/channel/UCNMrnafwXD2XKeeQOyfkFCw',
181 'view_count': int,
182 'availability': 'public',
183 'thumbnail': 'https://i.ytimg.com/vi/wUnFArIPDSY/maxresdefault.jpg',
184 'release_timestamp': 1652544074,
185 'title': 'EIROVĪZIJA SALĀTOS',
186 'live_status': 'was_live',
187 'uploader_id': '@LTV16plus',
188 'comment_count': int,
189 'channel_id': 'UCNMrnafwXD2XKeeQOyfkFCw',
190 'channel_follower_count': int,
191 'categories': ['Entertainment'],
192 'duration': 5269,
193 'upload_date': '20220514',
194 'age_limit': 0,
195 'channel': 'LTV_16plus',
196 'playable_in_embed': True,
197 'tags': [],
198 'uploader_url': 'https://www.youtube.com/@LTV16plus',
199 'like_count': int,
200 'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
204 def _real_extract(self, url):
205 video_id = urllib.parse.unquote(self._match_id(url))
206 webpage = self._download_webpage(url, video_id)
207 data = self._search_json(
208 r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
209 embed_type = traverse_obj(data, ('source', 'name', {str}))
211 if embed_type == 'telia':
212 ie_key = 'CloudyCDN'
213 embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
214 elif embed_type == 'youtube':
215 ie_key = 'Youtube'
216 embed_url = traverse_obj(data, ('source', 'id', {str}))
217 else:
218 raise ExtractorError(f'Unsupported embed type {embed_type!r}')
220 return self.url_result(
221 embed_url, ie_key, video_id, **traverse_obj(data, {
222 'title': ('parentInfo', 'title'),
223 'duration': ('parentInfo', 'duration', {int_or_none}),
224 'thumbnail': ('source', 'poster', {url_or_none}),
228 class LSMReplayIE(InfoExtractor):
229 _VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
230 _TESTS = [{
231 'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
232 'md5': '64f72a360ca530d5ed89c77646c9eee5',
233 'info_dict': {
234 'id': '46k_d23-6000-105',
235 'ext': 'mp4',
236 'timestamp': 1700586300,
237 'description': 'md5:0f1b14798cc39e1ae578bd0eb268f759',
238 'duration': 1442,
239 'upload_date': '20231121',
240 'title': '4. studija. Zolitūdes traģēdija un Inčupes stacija',
241 'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
243 }, {
244 'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
245 'md5': '719b33875cd1429846eeeaeec6df2830',
246 'info_dict': {
247 'id': 'a342781',
248 'ext': 'mp3',
249 'duration': 1823,
250 'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
251 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
252 'upload_date': '20231102',
253 'timestamp': 1698921060,
254 'description': 'md5:7bac3b2dd41e44325032943251c357b1',
256 }, {
257 'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
258 'only_matching': True,
261 def _fix_nuxt_data(self, webpage):
262 return re.sub(r'Object\.create\(null(?:,(\{.+\}))?\)', lambda m: m.group(1) or 'null', webpage)
264 def _real_extract(self, url):
265 video_id = self._match_id(url)
266 webpage = self._download_webpage(url, video_id)
268 data = self._search_nuxt_data(
269 self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
271 return {
272 '_type': 'url_transparent',
273 'id': video_id,
274 **traverse_obj(data, {
275 'url': ('playback', 'service', 'url', {url_or_none}),
276 'title': ('mediaItem', 'title'),
277 'description': ('mediaItem', ('lead', 'body')),
278 'duration': ('mediaItem', 'duration', {int_or_none}),
279 'timestamp': ('mediaItem', 'aired_at', {parse_iso8601}),
280 'thumbnail': ('mediaItem', 'largeThumbnail', {url_or_none}),
281 }, get_all=False),