5 from .common
import InfoExtractor
6 from .sproutvideo
import VidsIoIE
7 from .vimeo
import VimeoIE
8 from ..networking
.exceptions
import HTTPError
22 from ..utils
.traversal
import traverse_obj
, value
25 class PatreonBaseIE(InfoExtractor
):
26 @functools.cached_property
27 def patreon_user_agent(self
):
28 # Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection.
29 # Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in
30 if self
._get
_cookies
('https://www.patreon.com/').get('session_id'):
31 return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)'
32 return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
34 def _call_api(self
, ep
, item_id
, query
=None, headers
=None, fatal
=True, note
=None):
37 if 'User-Agent' not in headers
:
38 headers
['User-Agent'] = self
.patreon_user_agent
40 query
.update({'json-api-version': 1.0})
43 return self
._download
_json
(
44 f
'https://www.patreon.com/api/{ep}',
45 item_id
, note
=note
if note
else 'Downloading API JSON',
46 query
=query
, fatal
=fatal
, headers
=headers
)
47 except ExtractorError
as e
:
48 if not isinstance(e
.cause
, HTTPError
) or mimetype2ext(e
.cause
.response
.headers
.get('Content-Type')) != 'json':
50 err_json
= self
._parse
_json
(self
._webpage
_read
_content
(e
.cause
.response
, None, item_id
), item_id
, fatal
=False)
51 err_message
= traverse_obj(err_json
, ('errors', ..., 'detail'), get_all
=False)
53 raise ExtractorError(f
'Patreon said: {err_message}', expected
=True)
57 class PatreonIE(PatreonBaseIE
):
59 _VALID_URL
= r
'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
61 'url': 'http://www.patreon.com/creation?hid=743933',
62 'md5': 'e25505eec1053a6e6813b8ed369875cc',
66 'title': 'Episode 166: David Smalley of Dogma Debate',
67 'description': 'md5:34d207dd29aa90e24f1b3f58841b81c7',
68 'uploader': 'Cognitive Dissonance Podcast',
69 'thumbnail': 're:^https?://.*$',
70 'timestamp': 1406473987,
71 'upload_date': '20140727',
72 'uploader_id': '87145',
75 'uploader_url': 'https://www.patreon.com/dissonancepod',
76 'channel_id': '80642',
77 'channel_url': 'https://www.patreon.com/dissonancepod',
78 'channel_follower_count': int,
81 'url': 'http://www.patreon.com/creation?hid=754133',
82 'md5': '3eb09345bf44bf60451b8b0b81759d0a',
86 'title': 'CD 167 Extra',
87 'uploader': 'Cognitive Dissonance Podcast',
88 'thumbnail': 're:^https?://.*$',
91 'uploader_url': 'https://www.patreon.com/dissonancepod',
93 'skip': 'Patron-only content',
95 'url': 'https://www.patreon.com/creation?hid=1682498',
99 'title': 'I\'m on Patreon!',
100 'uploader': 'TraciJHines',
101 'thumbnail': 're:^https?://.*$',
102 'upload_date': '20150211',
103 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
104 'uploader_id': '@TraciHinesMusic',
105 'categories': ['Entertainment'],
110 'channel': 'TraciJHines',
111 'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg',
112 'live_status': 'not_live',
114 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg',
115 'availability': 'public',
116 'channel_follower_count': int,
117 'playable_in_embed': True,
118 'uploader_url': 'https://www.youtube.com/@TraciHinesMusic',
119 'comment_count': int,
120 'channel_is_verified': True,
121 'chapters': 'count:4',
122 'timestamp': 1423689666,
126 'skip_download': True,
129 'url': 'https://www.patreon.com/posts/episode-166-of-743933',
130 'only_matching': True,
132 'url': 'https://www.patreon.com/posts/743933',
133 'only_matching': True,
135 'url': 'https://www.patreon.com/posts/kitchen-as-seen-51706779',
136 'md5': '96656690071f6d64895866008484251b',
140 'title': 'KITCHEN AS SEEN ON DEEZ NUTS EXTENDED!',
141 'uploader': 'Cold Ones',
142 'thumbnail': 're:^https?://.*$',
143 'upload_date': '20210526',
144 'description': 'md5:557a409bd79d3898689419094934ba79',
145 'uploader_id': '14936315',
147 'skip': 'Patron-only content',
149 # m3u8 video (https://github.com/yt-dlp/yt-dlp/issues/2277)
150 'url': 'https://www.patreon.com/posts/video-sketchbook-32452882',
154 'comment_count': int,
155 'uploader_id': '4301314',
157 'timestamp': 1576696962,
158 'upload_date': '20191218',
159 'thumbnail': r
're:^https?://.*$',
160 'uploader_url': 'https://www.patreon.com/loish',
161 'description': 'md5:e2693e97ee299c8ece47ffdb67e7d9d2',
162 'title': 'VIDEO // sketchbook flipthrough',
163 'uploader': 'Loish ',
164 'tags': ['sketchbook', 'video'],
165 'channel_id': '1641751',
166 'channel_url': 'https://www.patreon.com/loish',
167 'channel_follower_count': int,
170 # bad videos under media (if media is included). Real one is under post_file
171 'url': 'https://www.patreon.com/posts/premium-access-70282931',
175 'title': '[Premium Access + Uncut] The Office - 2x6 The Fight - Group Reaction',
176 'channel_url': 'https://www.patreon.com/thenormies',
177 'channel_id': '573397',
178 'uploader_id': '2929435',
179 'uploader': 'The Normies',
180 'description': 'md5:79c9fd8778e2cef84049a94c058a5e23',
181 'comment_count': int,
182 'upload_date': '20220809',
183 'thumbnail': r
're:^https?://.*$',
184 'channel_follower_count': int,
186 'timestamp': 1660052820,
187 'tags': ['The Office', 'early access', 'uncut'],
188 'uploader_url': 'https://www.patreon.com/thenormies',
190 'skip': 'Patron-only content',
192 # dead vimeo and embed URLs, need to extract post_file
193 'url': 'https://www.patreon.com/posts/hunter-x-hunter-34007913',
197 'title': 'Hunter x Hunter | Kurapika DESTROYS Uvogin!!!',
199 'uploader': 'YaBoyRoshi',
200 'timestamp': 1581636833,
201 'channel_url': 'https://www.patreon.com/yaboyroshi',
202 'thumbnail': r
're:^https?://.*$',
203 'tags': ['Hunter x Hunter'],
204 'uploader_id': '14264111',
205 'comment_count': int,
206 'channel_follower_count': int,
207 'description': 'Kurapika is a walking cheat code!',
208 'upload_date': '20200213',
209 'channel_id': '2147162',
210 'uploader_url': 'https://www.patreon.com/yaboyroshi',
213 # NSFW vimeo embed URL
214 'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
218 'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
219 'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
220 'uploader': 'Npickyeonhwa',
221 'uploader_id': '90574422',
222 'uploader_url': 'https://www.patreon.com/Yeonhwa726',
223 'channel_id': '10237902',
224 'channel_url': 'https://www.patreon.com/Yeonhwa726',
226 'timestamp': 1705150153,
227 'upload_date': '20240113',
228 'comment_count': int,
230 'thumbnail': r
're:^https?://.+',
232 'params': {'skip_download': 'm3u8'},
233 'expected_warnings': ['Failed to parse XML: not well-formed'],
235 # multiple attachments/embeds
236 'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
240 'title': '"Holy Wars" (Megadeth) Solos Transcription & Lesson/Analysis',
241 'description': 'md5:d099ab976edfce6de2a65c2b169a88d3',
242 'uploader': 'Bradley Hall',
243 'uploader_id': '24401883',
244 'uploader_url': 'https://www.patreon.com/bradleyhallguitar',
245 'channel_id': '3193932',
246 'channel_url': 'https://www.patreon.com/bradleyhallguitar',
247 'channel_follower_count': int,
248 'timestamp': 1710777855,
249 'upload_date': '20240318',
251 'comment_count': int,
252 'thumbnail': r
're:^https?://.+',
254 'skip': 'Patron-only content',
256 # Contains a comment reply in the 'included' section
257 'url': 'https://www.patreon.com/posts/114721679',
261 'upload_date': '20241025',
262 'uploader': 'Japanalysis',
264 'thumbnail': r
're:^https?://.+',
265 'comment_count': int,
266 'title': 'Karasawa Part 2',
267 'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk',
268 'uploader_url': 'https://www.patreon.com/japanalysis',
269 'uploader_id': '80504268',
270 'channel_url': 'https://www.patreon.com/japanalysis',
271 'channel_follower_count': int,
272 'timestamp': 1729897015,
273 'channel_id': '9346307',
275 'params': {'getcomments': True},
277 _RETURN_TYPE
= 'video'
279 def _real_extract(self
, url
):
280 video_id
= self
._match
_id
(url
)
281 post
= self
._call
_api
(
282 f
'posts/{video_id}', video_id
, query
={
283 'fields[media]': 'download_url,mimetype,size_bytes',
284 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title,current_user_can_view',
285 'fields[user]': 'full_name,url',
286 'fields[post_tag]': 'value',
287 'fields[campaign]': 'url,name,patron_count',
288 'json-api-use-default-includes': 'false',
289 'include': 'audio,user,user_defined_tags,campaign,attachments_media',
291 attributes
= post
['data']['attributes']
292 info
= traverse_obj(attributes
, {
293 'title': ('title', {str.strip
}),
294 'description': ('content', {clean_html}
),
295 'thumbnail': ('image', ('large_url', 'url'), {url_or_none}
, any
),
296 'timestamp': ('published_at', {parse_iso8601}
),
297 'like_count': ('like_count', {int_or_none}
),
298 'comment_count': ('comment_count', {int_or_none}
),
303 for include
in traverse_obj(post
, ('included', lambda _
, v
: v
['type'])):
304 include_type
= include
['type']
305 if include_type
== 'media':
306 media_attributes
= traverse_obj(include
, ('attributes', {dict}
)) or {}
307 download_url
= url_or_none(media_attributes
.get('download_url'))
308 ext
= mimetype2ext(media_attributes
.get('mimetype'))
310 # if size_bytes is None, this media file is likely unavailable
311 # See: https://github.com/yt-dlp/yt-dlp/issues/4608
312 size_bytes
= int_or_none(media_attributes
.get('size_bytes'))
313 if download_url
and ext
in KNOWN_EXTENSIONS
and size_bytes
is not None:
316 'id': f
'{video_id}-{idx}',
318 'filesize': size_bytes
,
322 elif include_type
== 'user':
323 info
.update(traverse_obj(include
, {
324 'uploader': ('attributes', 'full_name', {str}
),
325 'uploader_id': ('id', {str_or_none}
),
326 'uploader_url': ('attributes', 'url', {url_or_none}
),
329 elif include_type
== 'post_tag':
330 if post_tag
:= traverse_obj(include
, ('attributes', 'value', {str}
)):
331 info
.setdefault('tags', []).append(post_tag
)
333 elif include_type
== 'campaign':
334 info
.update(traverse_obj(include
, {
335 'channel': ('attributes', 'title', {str}
),
336 'channel_id': ('id', {str_or_none}
),
337 'channel_url': ('attributes', 'url', {url_or_none}
),
338 'channel_follower_count': ('attributes', 'patron_count', {int_or_none}
),
341 # all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
342 headers
= {'referer': 'https://patreon.com/'}
344 # handle Vimeo embeds
345 if traverse_obj(attributes
, ('embed', 'provider')) == 'Vimeo':
346 v_url
= urllib
.parse
.unquote(self
._html
_search
_regex
(
347 r
'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
348 traverse_obj(attributes
, ('embed', 'html', {str}
)), 'vimeo url', fatal
=False) or '')
349 if url_or_none(v_url
) and self
._request
_webpage
(
350 v_url
, video_id
, 'Checking Vimeo embed URL', headers
=headers
,
351 fatal
=False, errnote
=False, expected_status
=429): # 429 is TLS fingerprint rejection
352 entries
.append(self
.url_result(
353 VimeoIE
._smuggle
_referrer
(v_url
, 'https://patreon.com/'),
354 VimeoIE
, url_transparent
=True))
356 embed_url
= traverse_obj(attributes
, ('embed', 'url', {url_or_none}
))
357 if embed_url
and (urlh
:= self
._request
_webpage
(
358 embed_url
, video_id
, 'Checking embed URL', headers
=headers
,
359 fatal
=False, errnote
=False, expected_status
=403)):
360 # Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need
361 # to check for "Sorry, we couldn&rsquo;t find that page" in the meta description tag
362 meta_description
= clean_html(self
._html
_search
_meta
(
363 'description', self
._webpage
_read
_content
(urlh
, embed_url
, video_id
, fatal
=False), default
=None))
364 # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
365 if ((urlh
.status
!= 403 and meta_description
!= 'Sorry, we couldn’t find that page')
366 or VidsIoIE
.suitable(embed_url
)):
367 entries
.append(self
.url_result(smuggle_url(embed_url
, headers
)))
369 post_file
= traverse_obj(attributes
, ('post_file', {dict}
))
371 name
= post_file
.get('name')
372 ext
= determine_ext(name
)
373 if ext
in KNOWN_EXTENSIONS
:
377 'url': post_file
['url'],
379 elif name
== 'video' or determine_ext(post_file
.get('url')) == 'm3u8':
380 formats
, subtitles
= self
._extract
_m
3u8_formats
_and
_subtitles
(post_file
['url'], video_id
)
384 'subtitles': subtitles
,
387 can_view_post
= traverse_obj(attributes
, 'current_user_can_view')
389 if can_view_post
and info
.get('comment_count'):
390 comments
= self
.extract_comments(video_id
)
392 if not entries
and can_view_post
is False:
393 self
.raise_no_formats('You do not have access to this post', video_id
=video_id
, expected
=True)
395 self
.raise_no_formats('No supported media found in this post', video_id
=video_id
, expected
=True)
396 elif len(entries
) == 1:
397 info
.update(entries
[0])
399 for entry
in entries
:
401 return self
.playlist_result(entries
, video_id
, **info
, __post_extractor
=comments
)
403 info
['id'] = video_id
404 info
['__post_extractor'] = comments
407 def _get_comments(self
, post_id
):
412 'include': 'parent.commenter.campaign,parent.post.user,parent.post.campaign.creator,parent.replies.parent,parent.replies.commenter.campaign,parent.replies.post.user,parent.replies.post.campaign.creator,commenter.campaign,post.user,post.campaign.creator,replies.parent,replies.commenter.campaign,replies.post.user,replies.post.campaign.creator,on_behalf_of_campaign',
413 'fields[comment]': 'body,created,is_by_creator',
414 'fields[user]': 'image_url,full_name,url',
415 'filter[flair]': 'image_tiny_url,name',
417 'json-api-version': 1.0,
418 'json-api-use-default-includes': 'false',
421 for page
in itertools
.count(1):
423 params
.update({'page[cursor]': cursor
} if cursor
else {})
424 response
= self
._call
_api
(
425 f
'posts/{post_id}/comments', post_id
, query
=params
, note
=f
'Downloading comments page {page}')
428 for comment
in traverse_obj(response
, (('data', 'included'), lambda _
, v
: v
['type'] == 'comment' and v
['id'])):
430 author_id
= traverse_obj(comment
, ('relationships', 'commenter', 'data', 'id'))
433 **traverse_obj(comment
, {
434 'id': ('id', {str_or_none}
),
435 'text': ('attributes', 'body', {str}
),
436 'timestamp': ('attributes', 'created', {parse_iso8601}
),
437 'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}
, any
),
438 'author_is_uploader': ('attributes', 'is_by_creator', {bool}
),
440 **traverse_obj(response
, (
441 'included', lambda _
, v
: v
['id'] == author_id
and v
['type'] == 'user', 'attributes', {
442 'author': ('full_name', {str}
),
443 'author_thumbnail': ('image_url', {url_or_none}
),
445 'author_id': author_id
,
448 if count
< traverse_obj(response
, ('meta', 'count')):
449 cursor
= traverse_obj(response
, ('data', -1, 'id'))
455 class PatreonCampaignIE(PatreonBaseIE
):
456 IE_NAME
= 'patreon:campaign'
457 _VALID_URL
= r
'''(?x)
458 https?://(?:www\.)?patreon\.com/(?:
459 (?:m|api/campaigns)/(?P<campaign_id>\d+)|
460 (?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
461 )(?:/posts)?/?(?:$|[?#])'''
463 'url': 'https://www.patreon.com/dissonancepod/',
465 'title': 'Cognitive Dissonance Podcast',
466 'channel_url': 'https://www.patreon.com/dissonancepod',
468 'description': r
're:(?s).*We produce a weekly news podcast focusing on stories that deal with skepticism and religion.*',
469 'channel_id': '80642',
470 'channel': 'Cognitive Dissonance Podcast',
472 'channel_follower_count': int,
473 'uploader_id': '87145',
474 'uploader_url': 'https://www.patreon.com/dissonancepod',
475 'uploader': 'Cognitive Dissonance Podcast',
476 'thumbnail': r
're:^https?://.*$',
478 'playlist_mincount': 68,
480 'url': 'https://www.patreon.com/m/4767637/posts',
482 'title': 'Not Just Bikes',
484 'channel_id': '4767637',
485 'channel_url': 'https://www.patreon.com/notjustbikes',
486 'description': r
're:(?s).*Not Just Bikes started as a way to explain why we chose to live in the Netherlands.*',
488 'channel': 'Not Just Bikes',
489 'uploader_url': 'https://www.patreon.com/notjustbikes',
491 'uploader_id': '37306634',
492 'thumbnail': r
're:^https?://.*$',
494 'playlist_mincount': 71,
496 'url': 'https://www.patreon.com/api/campaigns/4243769/posts',
498 'title': 'Second Thought',
499 'channel_follower_count': int,
501 'channel_id': '4243769',
502 'channel_url': 'https://www.patreon.com/secondthought',
503 'description': r
're:(?s).*Second Thought is an educational YouTube channel.*',
505 'channel': 'Second Thought',
506 'uploader_url': 'https://www.patreon.com/secondthought',
507 'uploader': 'JT Chapman',
508 'uploader_id': '32718287',
509 'thumbnail': r
're:^https?://.*$',
511 'playlist_mincount': 201,
513 'url': 'https://www.patreon.com/dissonancepod/posts',
514 'only_matching': True,
516 'url': 'https://www.patreon.com/m/5932659',
517 'only_matching': True,
519 'url': 'https://www.patreon.com/api/campaigns/4243769',
520 'only_matching': True,
523 def _entries(self
, campaign_id
):
526 'fields[post]': 'patreon_url,url',
527 'filter[campaign_id]': campaign_id
,
528 'filter[is_draft]': 'false',
529 'sort': '-published_at',
530 'json-api-use-default-includes': 'false',
533 for page
in itertools
.count(1):
535 params
.update({'page[cursor]': cursor
} if cursor
else {})
536 posts_json
= self
._call
_api
('posts', campaign_id
, query
=params
, note
=f
'Downloading posts page {page}')
538 cursor
= traverse_obj(posts_json
, ('meta', 'pagination', 'cursors', 'next'))
539 for post_url
in traverse_obj(posts_json
, ('data', ..., 'attributes', 'patreon_url')):
540 yield self
.url_result(urljoin('https://www.patreon.com/', post_url
), PatreonIE
)
545 def _real_extract(self
, url
):
547 campaign_id
, vanity
= self
._match
_valid
_url
(url
).group('campaign_id', 'vanity')
548 if campaign_id
is None:
549 webpage
= self
._download
_webpage
(url
, vanity
, headers
={'User-Agent': self
.patreon_user_agent
})
550 campaign_id
= self
._search
_nextjs
_data
(
551 webpage
, vanity
)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']
554 'json-api-use-default-includes': 'false',
555 'fields[user]': 'full_name,url',
556 'fields[campaign]': 'name,summary,url,patron_count,creation_count,is_nsfw,avatar_photo_url',
557 'include': 'creator',
560 campaign_response
= self
._call
_api
(
561 f
'campaigns/{campaign_id}', campaign_id
,
562 note
='Downloading campaign info', fatal
=False,
565 campaign_info
= campaign_response
.get('data') or {}
566 channel_name
= traverse_obj(campaign_info
, ('attributes', 'name'))
567 user_info
= traverse_obj(
568 campaign_response
, ('included', lambda _
, v
: v
['type'] == 'user'),
569 default
={}, expected_type
=dict, get_all
=False)
574 'title': channel_name
,
575 'entries': self
._entries
(campaign_id
),
576 'description': clean_html(traverse_obj(campaign_info
, ('attributes', 'summary'))),
577 'channel_url': traverse_obj(campaign_info
, ('attributes', 'url')),
578 'channel_follower_count': int_or_none(traverse_obj(campaign_info
, ('attributes', 'patron_count'))),
579 'channel_id': campaign_id
,
580 'channel': channel_name
,
581 'uploader_url': traverse_obj(user_info
, ('attributes', 'url')),
582 'uploader_id': str_or_none(user_info
.get('id')),
583 'uploader': traverse_obj(user_info
, ('attributes', 'full_name')),
584 'playlist_count': traverse_obj(campaign_info
, ('attributes', 'creation_count')),
585 'age_limit': 18 if traverse_obj(campaign_info
, ('attributes', 'is_nsfw')) else 0,
586 'thumbnail': url_or_none(traverse_obj(campaign_info
, ('attributes', 'avatar_photo_url'))),