[ie/facebook] Support more groups URLs (#11576)
[yt-dlp3.git] / yt_dlp / extractor / nekohacker.py
blob7168a2080e0dce2b154c279ea84c6aa5f22da5d6
1 import re
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 determine_ext,
7 extract_attributes,
8 get_element_by_class,
9 parse_duration,
10 url_or_none,
12 from ..utils.traversal import find_element, traverse_obj
15 class NekoHackerIE(InfoExtractor):
16 _VALID_URL = r'https?://(?:www\.)?nekohacker\.com/(?P<id>(?!free-dl)[\w-]+)'
17 _TESTS = [{
18 'url': 'https://nekohacker.com/nekoverse/',
19 'info_dict': {
20 'id': 'nekoverse',
21 'title': 'Nekoverse',
23 'playlist': [
25 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/01-Spaceship.mp3',
26 'md5': '44223701ebedba0467ebda4cc07fb3aa',
27 'info_dict': {
28 'id': '1712',
29 'ext': 'mp3',
30 'title': 'Spaceship',
31 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
32 'vcodec': 'none',
33 'acodec': 'mp3',
34 'release_date': '20221101',
35 'album': 'Nekoverse',
36 'artists': ['Neko Hacker'],
37 'track': 'Spaceship',
38 'track_number': 1,
39 'duration': 195.0,
43 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/02-City-Runner.mp3',
44 'md5': '8f853c71719389d32bbbd3f1a87b3f08',
45 'info_dict': {
46 'id': '1713',
47 'ext': 'mp3',
48 'title': 'City Runner',
49 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
50 'vcodec': 'none',
51 'acodec': 'mp3',
52 'release_date': '20221101',
53 'album': 'Nekoverse',
54 'artists': ['Neko Hacker'],
55 'track': 'City Runner',
56 'track_number': 2,
57 'duration': 148.0,
61 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/03-Nature-Talk.mp3',
62 'md5': '5a8a8ae852720cee4c0ac95c7d1a7450',
63 'info_dict': {
64 'id': '1714',
65 'ext': 'mp3',
66 'title': 'Nature Talk',
67 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
68 'vcodec': 'none',
69 'acodec': 'mp3',
70 'release_date': '20221101',
71 'album': 'Nekoverse',
72 'artists': ['Neko Hacker'],
73 'track': 'Nature Talk',
74 'track_number': 3,
75 'duration': 174.0,
79 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/04-Crystal-World.mp3',
80 'md5': 'd8e59a48061764e50d92386a294abd50',
81 'info_dict': {
82 'id': '1715',
83 'ext': 'mp3',
84 'title': 'Crystal World',
85 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
86 'vcodec': 'none',
87 'acodec': 'mp3',
88 'release_date': '20221101',
89 'album': 'Nekoverse',
90 'artists': ['Neko Hacker'],
91 'track': 'Crystal World',
92 'track_number': 4,
93 'duration': 199.0,
97 }, {
98 'url': 'https://nekohacker.com/susume/',
99 'info_dict': {
100 'id': 'susume',
101 'title': '進め!むじなカンパニー',
103 'playlist': [
105 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-feat.-六科なじむ-CV_-日高里菜-割戶真友-CV_-金元寿子-軽井沢ユキ-CV_-上坂すみれ-出稼ぎガルシア-CV_-金子彩花-.mp3',
106 'md5': 'fb13f008aa81f26ba48f91fd2d6186ce',
107 'info_dict': {
108 'id': '711',
109 'ext': 'mp3',
110 'title': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
111 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
112 'vcodec': 'none',
113 'acodec': 'mp3',
114 'release_date': '20210115',
115 'album': '進め!むじなカンパニー',
116 'artists': ['Neko Hacker'],
117 'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
118 'track_number': 1,
122 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-feat.-六科なじむ-CV_-日高里菜-.mp3',
123 'md5': '028803f70241df512b7764e73396fdd1',
124 'info_dict': {
125 'id': '709',
126 'ext': 'mp3',
127 'title': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
128 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
129 'vcodec': 'none',
130 'acodec': 'mp3',
131 'release_date': '20210115',
132 'album': '進め!むじなカンパニー',
133 'artists': ['Neko Hacker'],
134 'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
135 'track_number': 2,
139 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-instrumental.mp3',
140 'md5': 'adde9e9a16e1da5e602b579c247d0fb9',
141 'info_dict': {
142 'id': '710',
143 'ext': 'mp3',
144 'title': '進め!むじなカンパニー (instrumental)',
145 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
146 'vcodec': 'none',
147 'acodec': 'mp3',
148 'release_date': '20210115',
149 'album': '進め!むじなカンパニー',
150 'artists': ['Neko Hacker'],
151 'track': '進め!むじなカンパニー (instrumental)',
152 'track_number': 3,
156 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-instrumental.mp3',
157 'md5': 'ebb0443039cf5f9ff7fd557ed9b23599',
158 'info_dict': {
159 'id': '712',
160 'ext': 'mp3',
161 'title': 'むじな de なじむ (instrumental)',
162 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
163 'vcodec': 'none',
164 'acodec': 'mp3',
165 'release_date': '20210115',
166 'album': '進め!むじなカンパニー',
167 'artists': ['Neko Hacker'],
168 'track': 'むじな de なじむ (instrumental)',
169 'track_number': 4,
175 def _real_extract(self, url):
176 playlist_id = self._match_id(url)
178 webpage = self._download_webpage(url, playlist_id)
179 playlist = get_element_by_class('playlist', webpage)
181 if not playlist:
182 iframe_src = traverse_obj(webpage, (
183 {find_element(tag='iframe', html=True)}, {extract_attributes}, 'src', {url_or_none}))
184 if not iframe_src:
185 raise ExtractorError('No playlist or embed found in webpage')
186 elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src):
187 raise ExtractorError('Spotify embeds are not supported', expected=True)
188 return self.url_result(url, 'Generic')
190 player_params = self._search_json(
191 r'var srp_player_params_[\da-f]+\s*=', webpage, 'player params', playlist_id, default={})
193 entries = []
194 for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1):
195 entry = traverse_obj(extract_attributes(track), {
196 'url': ('data-audiopath', {url_or_none}),
197 'ext': ('data-audiopath', {determine_ext}),
198 'id': 'data-trackid',
199 'title': 'data-tracktitle',
200 'track': 'data-tracktitle',
201 'album': 'data-albumtitle',
202 'duration': ('data-tracktime', {parse_duration}),
203 'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
205 entries.append({
206 **entry,
207 'thumbnail': url_or_none(player_params.get('artwork')),
208 'track_number': track_number,
209 'artists': ['Neko Hacker'],
210 'vcodec': 'none',
211 'acodec': 'mp3' if entry['ext'] == 'mp3' else None,
214 return self.playlist_result(entries, playlist_id, traverse_obj(entries, (0, 'album')))