4 from .common
import InfoExtractor
15 class WyborczaVideoIE(InfoExtractor
):
16 # this id is not an article id, it has to be extracted from the article
17 _VALID_URL
= r
'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P<id>\d+)'
18 IE_NAME
= 'wyborcza:video'
20 'url': 'wyborcza:video:26207634',
24 'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar',
26 'uploader': 'Dorota Roman',
28 'thumbnail': r
're:https://.+\.jpg',
31 'url': 'https://wyborcza.pl/video/26207634',
32 'only_matching': True,
34 'url': 'https://wyborcza.pl/api-video/26207634',
35 'only_matching': True,
38 def _real_extract(self
, url
):
39 video_id
= self
._match
_id
(url
)
40 meta
= self
._download
_json
(f
'https://wyborcza.pl/api-video/{video_id}', video_id
)
43 base_url
= meta
['redirector'].replace('http://', 'https://') + meta
['basePath']
44 for quality
in ('standard', 'high'):
45 if not meta
['files'].get(quality
):
48 'url': base_url
+ meta
['files'][quality
],
49 'height': int_or_none(
51 r
'p(\d+)[a-z]+\.mp4$', meta
['files'][quality
],
52 'mp4 video height', default
=None)),
55 if meta
['files'].get('dash'):
56 formats
.extend(self
._extract
_mpd
_formats
(base_url
+ meta
['files']['dash'], video_id
))
61 'title': meta
.get('title'),
62 'description': meta
.get('lead'),
63 'uploader': meta
.get('signature'),
64 'thumbnail': meta
.get('imageUrl'),
65 'duration': meta
.get('duration'),
69 class WyborczaPodcastIE(InfoExtractor
):
71 https?://(?:www\.)?(?:
72 wyborcza\.pl/podcast(?:/0,172673\.html)?|
73 wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html
74 )(?:\?(?:[^&#]+?&)*podcast=(?P<id>\d+))?
77 'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast',
81 'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ',
82 'uploader': 'Michał Nogaś ',
83 'upload_date': '20210117',
84 'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d',
86 'thumbnail': r
're:https://.+\.jpg',
89 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673',
93 'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?',
94 'uploader': 'Agnieszka Urazińska ',
95 'upload_date': '20210115',
96 'description': 'md5:c161dc035f8dbb60077011fc41274899',
98 'thumbnail': r
're:https://.+\.jpg',
101 'url': 'https://wyborcza.pl/podcast',
104 'title': 'Gościnnie: Wyborcza, 8:10',
105 'series': 'Gościnnie: Wyborcza, 8:10',
107 'playlist_mincount': 370,
109 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html',
112 'title': 'Gościnnie: Wysokie Obcasy',
113 'series': 'Gościnnie: Wysokie Obcasy',
115 'playlist_mincount': 12,
118 def _real_extract(self
, url
):
119 podcast_id
= self
._match
_id
(url
)
121 if not podcast_id
: # playlist
122 podcast_id
= '395' if 'wysokieobcasy.pl/' in url
else '334'
123 return self
.url_result(TokFMAuditionIE
._create
_url
(podcast_id
), TokFMAuditionIE
, podcast_id
)
125 meta
= self
._download
_json
('https://wyborcza.pl/api/podcast', podcast_id
,
126 query
={'guid': podcast_id
, 'type': 'wo' if 'wysokieobcasy.pl/' in url
else None})
128 day
, month
, year
= self
._search
_regex
(r
'^(\d\d?) (\w+) (\d{4})$', meta
.get('publishedDate'),
129 'upload date', group
=(1, 2, 3), default
=(None, None, None))
133 'title': meta
.get('title'),
134 'description': meta
.get('description'),
135 'thumbnail': meta
.get('imageUrl'),
136 'duration': parse_duration(meta
.get('duration')),
137 'uploader': meta
.get('author'),
138 'upload_date': try_call(lambda: f
'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'),
142 class TokFMPodcastIE(InfoExtractor
):
143 _VALID_URL
= r
'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P<id>\d+),?'
144 IE_NAME
= 'tokfm:podcast'
146 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
150 'title': 'md5:a9b15488009065556900169fb8061cce',
151 'episode': 'md5:a9b15488009065556900169fb8061cce',
156 def _real_extract(self
, url
):
157 media_id
= self
._match
_id
(url
)
159 # in case it breaks see this but it returns a lot of useless data
160 # https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true
161 metadata
= self
._download
_json
(
162 f
'https://audycje.tokfm.pl/getp/3{media_id}', media_id
, 'Downloading podcast metadata')
164 raise ExtractorError('No such podcast', expected
=True)
165 metadata
= metadata
[0]
168 for ext
in ('aac', 'mp3'):
169 url_data
= self
._download
_json
(
170 f
'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
171 media_id
, f
'Downloading podcast {ext} URL')
172 # prevents inserting the mp3 (default) multiple times
173 if 'link_ssl' in url_data
and f
'.{ext}' in url_data
['link_ssl']:
175 'url': url_data
['link_ssl'],
184 'title': metadata
.get('podcast_name'),
185 'series': metadata
.get('series_name'),
186 'episode': metadata
.get('podcast_name'),
190 class TokFMAuditionIE(InfoExtractor
):
191 _VALID_URL
= r
'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P<id>\d+),?'
192 IE_NAME
= 'tokfm:audition'
194 'url': 'https://audycje.tokfm.pl/audycja/218,Analizy',
200 'playlist_count': 1635,
205 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36',
209 def _create_url(video_id
):
210 return f
'https://audycje.tokfm.pl/audycja/{video_id}'
212 def _real_extract(self
, url
):
213 audition_id
= self
._match
_id
(url
)
215 data
= self
._download
_json
(
216 f
'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}',
217 audition_id
, 'Downloading audition metadata', headers
=self
._HEADERS
)
219 raise ExtractorError('No such audition', expected
=True)
222 entries
= OnDemandPagedList(functools
.partial(
223 self
._fetch
_page
, audition_id
, data
), self
._PAGE
_SIZE
)
228 'title': data
.get('series_name'),
229 'series': data
.get('series_name'),
233 def _fetch_page(self
, audition_id
, data
, page
):
234 for retry
in self
.RetryManager():
235 podcast_page
= self
._download
_json
(
236 f
'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true',
237 audition_id
, f
'Downloading podcast list page {page + 1}', headers
=self
._HEADERS
)
239 retry
.error
= ExtractorError('Agora returned empty page', expected
=True)
241 for podcast
in podcast_page
:
243 '_type': 'url_transparent',
244 'url': podcast
['podcast_sharing_url'],
245 'ie_key': TokFMPodcastIE
.ie_key(),
246 'title': podcast
.get('podcast_name'),
247 'episode': podcast
.get('podcast_name'),
248 'description': podcast
.get('podcast_description'),
249 'timestamp': int_or_none(podcast
.get('podcast_timestamp')),
250 'series': data
.get('series_name'),