4 from .common
import InfoExtractor
5 from ..networking
.exceptions
import HTTPError
20 from ..utils
.traversal
import traverse_obj
23 class NewgroundsIE(InfoExtractor
):
24 _NETRC_MACHINE
= 'newgrounds'
25 _VALID_URL
= r
'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>\d+)(?:/format/flash)?'
27 'url': 'https://www.newgrounds.com/audio/listen/549479',
28 'md5': 'fe6033d297591288fa1c1f780386f07a',
32 'title': 'B7 - BusMode',
34 'timestamp': 1378892945,
35 'upload_date': '20130911',
38 'description': 'md5:b8b3c2958875189f07d8e313462e8c4f',
40 'thumbnail': r
're:^https://aicon\.ngfiles\.com/549/549479\.png',
43 'url': 'https://www.newgrounds.com/portal/view/1',
44 'md5': 'fbfb40e2dc765a7e830cb251d370d981',
49 'uploader': 'Brian-Beaton',
50 'timestamp': 955078533,
51 'upload_date': '20000407',
53 'description': 'Scrotum plays "catch."',
55 'thumbnail': r
're:^https://picon\.ngfiles\.com/0/flash_1_card\.png',
58 # source format unavailable, additional mp4 formats
59 'url': 'http://www.newgrounds.com/portal/view/689400',
63 'title': 'ZTV News Episode 8',
64 'uploader': 'ZONE-SAMA',
65 'timestamp': 1487983183,
66 'upload_date': '20170225',
68 'description': 'md5:aff9b330ec2e78ed93b1ad6d017accc6',
70 'thumbnail': r
're:^https://picon\.ngfiles\.com/689000/flash_689400_card\.png',
73 'skip_download': True,
76 'url': 'https://www.newgrounds.com/portal/view/297383',
77 'md5': '2c11f5fd8cb6b433a63c89ba3141436c',
81 'title': 'Metal Gear Awesome',
82 'uploader': 'Egoraptor',
83 'timestamp': 1140681292,
84 'upload_date': '20060223',
86 'description': 'md5:9246c181614e23754571995104da92e0',
88 'thumbnail': r
're:^https://picon\.ngfiles\.com/297000/flash_297383_card\.png',
91 'url': 'https://www.newgrounds.com/portal/view/297383/format/flash',
92 'md5': '5d05585a9a0caca059f5abfbd3865524',
96 'title': 'Metal Gear Awesome',
97 'description': 'Metal Gear Awesome',
98 'uploader': 'Egoraptor',
99 'upload_date': '20060223',
100 'timestamp': 1140681292,
103 'thumbnail': r
're:^https://picon\.ngfiles\.com/297000/flash_297383_card\.png',
106 'url': 'https://www.newgrounds.com/portal/view/823109',
110 'title': 'Rouge Futa Fleshlight Fuck',
111 'description': 'I made a fleshlight model and I wanted to use it in an animation. Based on a video by CDNaturally.',
112 'uploader': 'DefaultUser12',
113 'upload_date': '20211122',
114 'timestamp': 1637611540,
117 'thumbnail': r
're:^https://picon\.ngfiles\.com/823000/flash_823109_card\.png',
126 _LOGIN_URL
= 'https://www.newgrounds.com/passport'
128 def _perform_login(self
, username
, password
):
129 login_webpage
= self
._download
_webpage
(self
._LOGIN
_URL
, None, 'Downloading login page')
130 login_url
= urljoin(self
._LOGIN
_URL
, self
._search
_regex
(
131 r
'<form action="([^"]+)"', login_webpage
, 'login endpoint', default
=None))
132 result
= self
._download
_json
(login_url
, None, 'Logging in', headers
={
133 'Accept': 'application/json',
134 'Referer': self
._LOGIN
_URL
,
135 'X-Requested-With': 'XMLHttpRequest',
136 }, data
=urlencode_postdata({
137 **self
._hidden
_inputs
(login_webpage
),
138 'username': username
,
139 'password': password
,
141 if errors
:= traverse_obj(result
, ('errors', ..., {str}
)):
142 raise ExtractorError(', '.join(errors
) or 'Unknown Error', expected
=True)
144 def _real_extract(self
, url
):
145 media_id
= self
._match
_id
(url
)
147 webpage
= self
._download
_webpage
(url
, media_id
)
148 except ExtractorError
as error
:
149 if isinstance(error
.cause
, HTTPError
) and error
.cause
.status
== 401:
150 self
.raise_login_required()
153 media_url_string
= self
._search
_regex
(
154 r
'embedController\(\[{"url"\s*:\s*("[^"]+"),', webpage
, 'media url', default
=None)
158 'url': self
._parse
_json
(media_url_string
, media_id
),
159 'format_id': 'source',
164 json_video
= self
._download
_json
(f
'https://www.newgrounds.com/portal/video/{media_id}', media_id
, headers
={
165 'Accept': 'application/json',
167 'X-Requested-With': 'XMLHttpRequest',
171 uploader
= traverse_obj(json_video
, ('author', {str}
))
172 for format_id
, sources
in traverse_obj(json_video
, ('sources', {dict.items
}, ...)):
173 quality
= int_or_none(format_id
[:-1])
175 'format_id': format_id
,
178 } for url
in traverse_obj(sources
, (..., 'src', {url_or_none}
)))
181 uploader
= self
._html
_search
_regex
(
182 (r
'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>',
183 r
'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage
, 'uploader',
186 if len(formats
) == 1:
187 formats
[0]['filesize'] = int_or_none(self
._html
_search
_regex
(
188 r
'"filesize"\s*:\s*["\']?
([\d
]+)["\']?,', webpage, 'filesize', default=None))
190 video_type_description = self._html_search_regex(
191 r'"description
"\s*:\s*["\']?
([^
"\']+)["\']?
,', webpage, 'media
type', default=None)
192 if video_type_description == 'Audio File
':
193 formats[0]['vcodec
'] = 'none
'
195 self._check_formats(formats, media_id)
198 'title
': self._html_extract_title(webpage),
199 'uploader
': uploader,
200 'timestamp
': unified_timestamp(self._search_regex(
201 r'itemprop
="(?:uploadDate|datePublished)"\s
+content
="([^"]+)"',
202 webpage, 'timestamp', default=None)),
203 'duration': parse_duration(self._html_search_regex(
204 r'"duration
"\s*:\s*["\']?
(\d
+)["\']?', webpage, 'duration', default=None)),
206 'thumbnail': self._og_search_thumbnail(webpage),
208 clean_html(get_element_by_id('author_comments', webpage))
209 or self._og_search_description(webpage)),
210 'age_limit': self._AGE_LIMIT.get(self._html_search_regex(
211 r'<h2\s+class=["\']rated
-([etma
])["\']', webpage, 'age_limit', default='e')),
212 'view_count': parse_count(self._html_search_regex(
213 r'(?s)<dt>\s*(?:Views|Listens)\s*</dt>\s*<dd>([\d\.,]+)</dd>',
214 webpage, 'view count', default=None)),
218 class NewgroundsPlaylistIE(InfoExtractor):
219 IE_NAME = 'Newgrounds:playlist'
220 _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
222 'url': 'https://www.newgrounds.com/collection/cats',
227 'playlist_mincount': 45,
229 'url': 'https://www.newgrounds.com/collection/dogs',
234 'playlist_mincount': 26,
236 'url': 'http://www.newgrounds.com/audio/search/title/cats',
237 'only_matching': True,
240 def _real_extract(self, url):
241 playlist_id = self._match_id(url)
243 webpage = self._download_webpage(url, playlist_id)
245 title = self._html_extract_title(webpage, default=None)
248 webpage = self._search_regex(
249 r'(?s)<div[^>]+\bclass=["\']column
wide(.+)',
250 webpage, 'wide column
', default=webpage)
253 for a, path, media_id in re.findall(
254 r'(<a
[^
>]+\bhref
=["\'][^"\']+((?
:portal
/view|audio
/listen
)/(\d
+))[^
>]+>)',
256 a_class = extract_attributes(a).get('class')
257 if a_class not in ('item
-portalsubmission
', 'item
-audiosubmission
'):
261 f'https
://www
.newgrounds
.com
/{path}
',
262 ie=NewgroundsIE.ie_key(), video_id=media_id))
264 return self.playlist_result(entries, playlist_id, title)
267 class NewgroundsUserIE(InfoExtractor):
268 IE_NAME = 'Newgrounds
:user
'
269 _VALID_URL = r'https?
://(?P
<id>[^\
.]+)\
.newgrounds\
.com
/(?
:movies|audio
)/?
(?
:[#?]|$)'
271 'url': 'https://burn7.newgrounds.com/audio',
275 'playlist_mincount': 150,
277 'url': 'https://burn7.newgrounds.com/movies',
281 'playlist_mincount': 2,
283 'url': 'https://brian-beaton.newgrounds.com/movies',
285 'id': 'brian-beaton',
287 'playlist_mincount': 10,
291 def _fetch_page(self
, channel_id
, url
, page
):
293 posts_info
= self
._download
_json
(
294 f
'{url}?page={page}', channel_id
,
295 note
=f
'Downloading page {page}', headers
={
296 'Accept': 'application/json, text/javascript, */*; q = 0.01',
297 'X-Requested-With': 'XMLHttpRequest',
299 for post
in traverse_obj(posts_info
, ('items', ..., ..., {str}
)):
300 path
, media_id
= self
._search
_regex
(
301 r
'<a[^>]+\bhref=["\'][^
"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
302 post, 'url', group=(1, 2))
303 yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
305 def _real_extract(self, url):
306 channel_id = self._match_id(url)
308 entries = OnDemandPagedList(functools.partial(
309 self._fetch_page, channel_id, url), self._PAGE_SIZE)
311 return self.playlist_result(entries, channel_id)