[cleanup] Misc (#8968)
[yt-dlp.git] / yt_dlp / extractor / blerp.py
blob4631ad2e971bca0f97871dcaff6648283e9c3ad9
1 import json
3 from .common import InfoExtractor
4 from ..utils import strip_or_none, traverse_obj
7 class BlerpIE(InfoExtractor):
8 IE_NAME = 'blerp'
9 _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
10 _TESTS = [{
11 'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
12 'info_dict': {
13 'id': '6320fe8745636cb4dd677a5a',
14 'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
15 'uploader': 'luminousaj',
16 'uploader_id': '5fb81e51aa66ae000c395478',
17 'ext': 'mp3',
18 'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
20 }, {
21 'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
22 'info_dict': {
23 'id': '5bc94ef4796001000498429f',
24 'title': 'Yee',
25 'uploader': '179617322678353920',
26 'uploader_id': '5ba99cf71386730004552c42',
27 'ext': 'mp3',
28 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
32 _GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
33 _GRAPHQL_QUERY = (
34 '''query webBitePageGetBite($_id: MongoID!) {
35 web {
36 biteById(_id: $_id) {
37 ...bitePageFrag
38 __typename
40 __typename
44 fragment bitePageFrag on Bite {
45 _id
46 title
47 userKeywords
48 keywords
49 color
50 visibility
51 isPremium
52 owned
53 price
54 extraReview
55 isAudioExists
56 image {
57 filename
58 original {
59 url
60 __typename
62 __typename
64 userReactions {
65 _id
66 reactions
67 createdAt
68 __typename
70 topReactions
71 totalSaveCount
72 saved
73 blerpLibraryType
74 license
75 licenseMetaData
76 playCount
77 totalShareCount
78 totalFavoriteCount
79 totalAddedToBoardCount
80 userCategory
81 userAudioQuality
82 audioCreationState
83 transcription
84 userTranscription
85 description
86 createdAt
87 updatedAt
88 author
89 listingType
90 ownerObject {
91 _id
92 username
93 profileImage {
94 filename
95 original {
96 url
97 __typename
99 __typename
101 __typename
103 transcription
104 favorited
105 visibility
106 isCurated
107 sourceUrl
108 audienceRating
109 strictAudienceRating
110 ownerId
111 reportObject {
112 reportedContentStatus
113 __typename
115 giphy {
118 __typename
120 audio {
121 filename
122 original {
124 __typename
126 mp3 {
128 __typename
130 __typename
132 __typename
135 ''')
137 def _real_extract(self, url):
138 audio_id = self._match_id(url)
140 data = {
141 'operationName': self._GRAPHQL_OPERATIONNAME,
142 'query': self._GRAPHQL_QUERY,
143 'variables': {
144 '_id': audio_id
148 headers = {
149 'Content-Type': 'application/json'
152 json_result = self._download_json('https://api.blerp.com/graphql',
153 audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
155 bite_json = json_result['data']['web']['biteById']
157 info_dict = {
158 'id': bite_json['_id'],
159 'url': bite_json['audio']['mp3']['url'],
160 'title': bite_json['title'],
161 'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
162 'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
163 'ext': 'mp3',
164 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
167 return info_dict