3 from .common
import InfoExtractor
4 from ..utils
import float_or_none
, int_or_none
, parse_iso8601
, url_or_none
5 from ..utils
.traversal
import traverse_obj
8 class Art19IE(InfoExtractor
):
9 _UUID_REGEX
= r
'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
11 rf
'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
12 rf
'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
14 _EMBED_REGEX
= [rf
'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
17 'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
19 'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
21 'title': 'Why Did DeSantis Drop Out?',
22 'series': 'The Daily Briefing',
23 'release_timestamp': 1705941275,
24 'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
25 'episode': 'Episode 582',
26 'thumbnail': r
're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
27 'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
28 'upload_date': '20240122',
29 'timestamp': 1705940815,
30 'episode_number': 582,
31 'modified_date': '20240122',
32 'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
33 'modified_timestamp': 1705941275,
34 'release_date': '20240122',
38 'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
40 'id': '8319b776-4153-4d22-8630-631f204a03dd',
42 'title': 'Martha Stewart: The Homemaker Hustler Part 2',
43 'modified_date': '20240116',
44 'upload_date': '20240105',
45 'modified_timestamp': 1705435802,
46 'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
47 'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
48 'thumbnail': r
're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
49 'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
50 'release_timestamp': 1705305660,
51 'release_date': '20240115',
52 'timestamp': 1704481536,
54 'series': 'Scamfluencers',
55 'duration': 2588.37501,
56 'episode': 'Episode 88',
60 'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
62 'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
64 'title': "'Verstappen wordt een synoniem voor Formule 1'",
65 'season': 'Seizoen 6',
66 'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
67 'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
68 'duration': 3061.82111,
69 'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
70 'release_date': '20231126',
71 'modified_timestamp': 1701156004,
72 'thumbnail': r
're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
75 'modified_date': '20231128',
76 'upload_date': '20231126',
77 'timestamp': 1701025981,
78 'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
79 'series': 'De Boordradio',
80 'release_timestamp': 1701026308,
81 'episode': 'Episode 52',
84 'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
86 'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
88 'title': 'Larry Bucshon announces retirement from congress',
89 'upload_date': '20240115',
90 'episode_number': 148,
91 'episode': 'Episode 148',
92 'thumbnail': r
're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
93 'release_date': '20240115',
94 'timestamp': 1705328205,
95 'release_timestamp': 1705329275,
96 'series': 'All INdiana Politics',
97 'modified_date': '20240117',
98 'modified_timestamp': 1705458901,
99 'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
100 'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
101 'description': 'md5:53b5239e4d14973a87125c217c255b2a',
102 'duration': 1256.18848,
107 def _extract_embed_urls(cls
, url
, webpage
):
108 yield from super()._extract
_embed
_urls
(url
, webpage
)
109 for episode_id
in re
.findall(
110 rf
'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage
):
111 yield f
'https://rss.art19.com/episodes/{episode_id}.mp3'
113 def _real_extract(self
, url
):
114 episode_id
= self
._match
_id
(url
)
116 player_metadata
= self
._download
_json
(
117 f
'https://art19.com/episodes/{episode_id}', episode_id
,
118 note
='Downloading player metadata', fatal
=False,
119 headers
={'Accept': 'application/vnd.art19.v0+json'})
120 rss_metadata
= self
._download
_json
(
121 f
'https://rss.art19.com/episodes/{episode_id}.json', episode_id
, fatal
=False,
122 note
='Downloading RSS metadata')
125 'format_id': 'direct',
126 'url': f
'https://rss.art19.com/episodes/{episode_id}.mp3',
130 for fmt_id
, fmt_data
in traverse_obj(rss_metadata
, ('content', 'media', {dict.items
}, ...)):
131 if fmt_id
== 'waveform_bin':
133 fmt_url
= traverse_obj(fmt_data
, ('url', {url_or_none}
))
141 'quality': -2 if fmt_id
== 'ogg' else -1,
147 **traverse_obj(player_metadata
, ('episode', {
148 'title': ('title', {str}
),
149 'description': ('description_plain', {str}
),
150 'episode_id': ('id', {str}
),
151 'episode_number': ('episode_number', {int_or_none}
),
152 'season_id': ('season_id', {str}
),
153 'series_id': ('series_id', {str}
),
154 'timestamp': ('created_at', {parse_iso8601}
),
155 'release_timestamp': ('released_at', {parse_iso8601}
),
156 'modified_timestamp': ('updated_at', {parse_iso8601}
),
158 **traverse_obj(rss_metadata
, ('content', {
159 'title': ('episode_title', {str}
),
160 'description': ('episode_description_plain', {str}
),
161 'episode_id': ('episode_id', {str}
),
162 'episode_number': ('episode_number', {int_or_none}
),
163 'season': ('season_title', {str}
),
164 'season_id': ('season_id', {str}
),
165 'season_number': ('season_number', {int_or_none}
),
166 'series': ('series_title', {str}
),
167 'series_id': ('series_id', {str}
),
168 'thumbnail': ('cover_image', {url_or_none}
),
169 'duration': ('duration', {float_or_none}
),
174 class Art19ShowIE(InfoExtractor
):
175 _VALID_URL_BASE
= r
'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
177 rf
'{_VALID_URL_BASE}(?:$|[#?])',
178 r
'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
180 _EMBED_REGEX
= [rf
'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
183 'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
186 'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
187 'display_id': 'echt-gebeurd',
188 'title': 'Echt Gebeurd',
189 'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
190 'timestamp': 1492642167,
191 'upload_date': '20170419',
192 'modified_timestamp': int,
193 'modified_date': str,
196 'playlist_mincount': 425,
198 'url': 'https://www.art19.com/shows/echt-gebeurd',
201 'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
202 'display_id': 'echt-gebeurd',
203 'title': 'Echt Gebeurd',
204 'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
205 'timestamp': 1492642167,
206 'upload_date': '20170419',
207 'modified_timestamp': int,
208 'modified_date': str,
211 'playlist_mincount': 425,
213 'url': 'https://rss.art19.com/scamfluencers',
216 'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
217 'display_id': 'scamfluencers',
218 'title': 'Scamfluencers',
219 'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
220 'timestamp': 1647368573,
221 'upload_date': '20220315',
222 'modified_timestamp': int,
223 'modified_date': str,
226 'playlist_mincount': 90,
228 'url': 'https://art19.com/shows/enthuellt/embed',
231 'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
232 'display_id': 'enthuellt',
233 'title': 'Enthüllt',
234 'description': 'md5:17752246643414a2fd51744fc9a1c08e',
235 'timestamp': 1601645860,
236 'upload_date': '20201002',
237 'modified_timestamp': int,
238 'modified_date': str,
241 'playlist_mincount': 10,
244 'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
247 'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
248 'display_id': 'deconstructing-yourself',
249 'title': 'Deconstructing Yourself',
250 'description': 'md5:dab5082b28b248a35476abf64768854d',
251 'timestamp': 1570581181,
252 'upload_date': '20191009',
253 'modified_timestamp': int,
254 'modified_date': str,
257 'playlist_mincount': 80,
259 'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
262 'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
263 'display_id': 'the-ben-joravsky-show',
264 'title': 'The Ben Joravsky Show',
265 'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
266 'timestamp': 1550875095,
267 'upload_date': '20190222',
268 'modified_timestamp': int,
269 'modified_date': str,
270 'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
272 'playlist_mincount': 1900,
276 def _extract_embed_urls(cls
, url
, webpage
):
277 yield from super()._extract
_embed
_urls
(url
, webpage
)
278 for series_id
in re
.findall(
279 r
'<div[^>]+\bclass=[\'"][^\'"]*art19
-web
-player
[^
\'"]*[\'"][^
>]+\bdata
-series
-id=[\'"]([\w-]+)[\'"]', webpage):
280 yield f'https
://art19
.com
/shows
/{series_id}
'
282 def _real_extract(self, url):
283 series_id = self._match_id(url)
284 series_metadata = self._download_json(
285 f'https
://art19
.com
/series
/{series_id}
', series_id, note='Downloading series metadata
',
286 headers={'Accept
': 'application
/vnd
.art19
.v0
+json
'})
291 self.url_result(f'https
://rss
.art19
.com
/episodes
/{episode_id}
.mp3
', Art19IE)
292 for episode_id in traverse_obj(series_metadata, ('series
', 'episode_ids
', ..., {str}))
294 **traverse_obj(series_metadata, ('series
', {
296 'display_id
': ('slug
', {str}),
297 'title
': ('title
', {str}),
298 'description
': ('description_plain
', {str}),
299 'timestamp
': ('created_at
', {parse_iso8601}),
300 'modified_timestamp
': ('updated_at
', {parse_iso8601}),
302 'tags
': traverse_obj(series_metadata, ('tags
', ..., 'name
', {str})),