[ie/wistia] Support password-protected videos (#11100)
[yt-dlp3.git] / yt_dlp / extractor / nekohacker.py
blob537158e87b924c6c9203e187ccaf046e1497d61e
1 import re
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 determine_ext,
7 extract_attributes,
8 get_element_by_class,
9 get_element_text_and_html_by_tag,
10 parse_duration,
11 traverse_obj,
12 try_call,
13 url_or_none,
17 class NekoHackerIE(InfoExtractor):
18 _VALID_URL = r'https?://(?:www\.)?nekohacker\.com/(?P<id>(?!free-dl)[\w-]+)'
19 _TESTS = [{
20 'url': 'https://nekohacker.com/nekoverse/',
21 'info_dict': {
22 'id': 'nekoverse',
23 'title': 'Nekoverse',
25 'playlist': [
27 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/01-Spaceship.mp3',
28 'md5': '44223701ebedba0467ebda4cc07fb3aa',
29 'info_dict': {
30 'id': '1712',
31 'ext': 'mp3',
32 'title': 'Spaceship',
33 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
34 'vcodec': 'none',
35 'acodec': 'mp3',
36 'release_date': '20221101',
37 'album': 'Nekoverse',
38 'artist': 'Neko Hacker',
39 'track': 'Spaceship',
40 'track_number': 1,
41 'duration': 195.0,
45 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/02-City-Runner.mp3',
46 'md5': '8f853c71719389d32bbbd3f1a87b3f08',
47 'info_dict': {
48 'id': '1713',
49 'ext': 'mp3',
50 'title': 'City Runner',
51 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
52 'vcodec': 'none',
53 'acodec': 'mp3',
54 'release_date': '20221101',
55 'album': 'Nekoverse',
56 'artist': 'Neko Hacker',
57 'track': 'City Runner',
58 'track_number': 2,
59 'duration': 148.0,
63 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/03-Nature-Talk.mp3',
64 'md5': '5a8a8ae852720cee4c0ac95c7d1a7450',
65 'info_dict': {
66 'id': '1714',
67 'ext': 'mp3',
68 'title': 'Nature Talk',
69 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
70 'vcodec': 'none',
71 'acodec': 'mp3',
72 'release_date': '20221101',
73 'album': 'Nekoverse',
74 'artist': 'Neko Hacker',
75 'track': 'Nature Talk',
76 'track_number': 3,
77 'duration': 174.0,
81 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/04-Crystal-World.mp3',
82 'md5': 'd8e59a48061764e50d92386a294abd50',
83 'info_dict': {
84 'id': '1715',
85 'ext': 'mp3',
86 'title': 'Crystal World',
87 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
88 'vcodec': 'none',
89 'acodec': 'mp3',
90 'release_date': '20221101',
91 'album': 'Nekoverse',
92 'artist': 'Neko Hacker',
93 'track': 'Crystal World',
94 'track_number': 4,
95 'duration': 199.0,
99 }, {
100 'url': 'https://nekohacker.com/susume/',
101 'info_dict': {
102 'id': 'susume',
103 'title': '進め!むじなカンパニー',
105 'playlist': [
107 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-feat.-六科なじむ-CV_-日高里菜-割戶真友-CV_-金元寿子-軽井沢ユキ-CV_-上坂すみれ-出稼ぎガルシア-CV_-金子彩花-.mp3',
108 'md5': 'fb13f008aa81f26ba48f91fd2d6186ce',
109 'info_dict': {
110 'id': '711',
111 'ext': 'mp3',
112 'title': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
113 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
114 'vcodec': 'none',
115 'acodec': 'mp3',
116 'release_date': '20210115',
117 'album': '進め!むじなカンパニー',
118 'artist': 'Neko Hacker',
119 'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
120 'track_number': 1,
124 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-feat.-六科なじむ-CV_-日高里菜-.mp3',
125 'md5': '028803f70241df512b7764e73396fdd1',
126 'info_dict': {
127 'id': '709',
128 'ext': 'mp3',
129 'title': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
130 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
131 'vcodec': 'none',
132 'acodec': 'mp3',
133 'release_date': '20210115',
134 'album': '進め!むじなカンパニー',
135 'artist': 'Neko Hacker',
136 'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
137 'track_number': 2,
141 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-instrumental.mp3',
142 'md5': 'adde9e9a16e1da5e602b579c247d0fb9',
143 'info_dict': {
144 'id': '710',
145 'ext': 'mp3',
146 'title': '進め!むじなカンパニー (instrumental)',
147 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
148 'vcodec': 'none',
149 'acodec': 'mp3',
150 'release_date': '20210115',
151 'album': '進め!むじなカンパニー',
152 'artist': 'Neko Hacker',
153 'track': '進め!むじなカンパニー (instrumental)',
154 'track_number': 3,
158 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-instrumental.mp3',
159 'md5': 'ebb0443039cf5f9ff7fd557ed9b23599',
160 'info_dict': {
161 'id': '712',
162 'ext': 'mp3',
163 'title': 'むじな de なじむ (instrumental)',
164 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
165 'vcodec': 'none',
166 'acodec': 'mp3',
167 'release_date': '20210115',
168 'album': '進め!むじなカンパニー',
169 'artist': 'Neko Hacker',
170 'track': 'むじな de なじむ (instrumental)',
171 'track_number': 4,
177 def _real_extract(self, url):
178 playlist_id = self._match_id(url)
180 webpage = self._download_webpage(url, playlist_id)
181 playlist = get_element_by_class('playlist', webpage)
183 if not playlist:
184 iframe = try_call(lambda: get_element_text_and_html_by_tag('iframe', webpage)[1]) or ''
185 iframe_src = url_or_none(extract_attributes(iframe).get('src'))
186 if not iframe_src:
187 raise ExtractorError('No playlist or embed found in webpage')
188 elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src):
189 raise ExtractorError('Spotify embeds are not supported', expected=True)
190 return self.url_result(url, 'Generic')
192 entries = []
193 for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1):
194 entry = traverse_obj(extract_attributes(track), {
195 'url': ('data-audiopath', {url_or_none}),
196 'ext': ('data-audiopath', {determine_ext}),
197 'id': 'data-trackid',
198 'title': 'data-tracktitle',
199 'track': 'data-tracktitle',
200 'album': 'data-albumtitle',
201 'duration': ('data-tracktime', {parse_duration}),
202 'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
203 'thumbnail': ('data-albumart', {url_or_none}),
205 entries.append({
206 **entry,
207 'track_number': track_number,
208 'artist': 'Neko Hacker',
209 'vcodec': 'none',
210 'acodec': 'mp3' if entry['ext'] == 'mp3' else None,
213 return self.playlist_result(entries, playlist_id, traverse_obj(entries, (0, 'album')))