yt_dlp/extractor/blerp.py

   1 import json
   2
   3 from .common import InfoExtractor
   4 from ..utils import strip_or_none, traverse_obj
   5
   6
   7 class BlerpIE(InfoExtractor):
   8     IE_NAME = 'blerp'
   9     _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
  10     _TESTS = [{
  11         'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
  12         'info_dict': {
  13             'id': '6320fe8745636cb4dd677a5a',
  14             'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
  15             'uploader': 'luminousaj',
  16             'uploader_id': '5fb81e51aa66ae000c395478',
  17             'ext': 'mp3',
  18             'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
  19         },
  20     }, {
  21         'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
  22         'info_dict': {
  23             'id': '5bc94ef4796001000498429f',
  24             'title': 'Yee',
  25             'uploader': '179617322678353920',
  26             'uploader_id': '5ba99cf71386730004552c42',
  27             'ext': 'mp3',
  28             'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'],
  29         },
  30     }]
  31
  32     _GRAPHQL_OPERATIONNAME = 'webBitePageGetBite'
  33     _GRAPHQL_QUERY = (
  34         '''query webBitePageGetBite($_id: MongoID!) {
  35             web {
  36                 biteById(_id: $_id) {
  37                     ...bitePageFrag
  38                     __typename
  39                 }
  40                 __typename
  41             }
  42         }
  43
  44         fragment bitePageFrag on Bite {
  45             _id
  46             title
  47             userKeywords
  48             keywords
  49             color
  50             visibility
  51             isPremium
  52             owned
  53             price
  54             extraReview
  55             isAudioExists
  56             image {
  57                 filename
  58                 original {
  59                     url
  60                     __typename
  61                 }
  62                 __typename
  63             }
  64             userReactions {
  65                 _id
  66                 reactions
  67                 createdAt
  68                 __typename
  69             }
  70             topReactions
  71             totalSaveCount
  72             saved
  73             blerpLibraryType
  74             license
  75             licenseMetaData
  76             playCount
  77             totalShareCount
  78             totalFavoriteCount
  79             totalAddedToBoardCount
  80             userCategory
  81             userAudioQuality
  82             audioCreationState
  83             transcription
  84             userTranscription
  85             description
  86             createdAt
  87             updatedAt
  88             author
  89             listingType
  90             ownerObject {
  91                 _id
  92                 username
  93                 profileImage {
  94                     filename
  95                     original {
  96                         url
  97                         __typename
  98                     }
  99                     __typename
 100                 }
 101                 __typename
 102             }
 103             transcription
 104             favorited
 105             visibility
 106             isCurated
 107             sourceUrl
 108             audienceRating
 109             strictAudienceRating
 110             ownerId
 111             reportObject {
 112                 reportedContentStatus
 113                 __typename
 114             }
 115             giphy {
 116                 mp4
 117                 gif
 118                 __typename
 119             }
 120             audio {
 121                 filename
 122                 original {
 123                     url
 124                     __typename
 125                 }
 126                 mp3 {
 127                     url
 128                     __typename
 129                 }
 130                 __typename
 131             }
 132             __typename
 133         }
 134
 135         ''')
 136
 137     def _real_extract(self, url):
 138         audio_id = self._match_id(url)
 139
 140         data = {
 141             'operationName': self._GRAPHQL_OPERATIONNAME,
 142             'query': self._GRAPHQL_QUERY,
 143             'variables': {
 144                 '_id': audio_id,
 145             },
 146         }
 147
 148         headers = {
 149             'Content-Type': 'application/json',
 150         }
 151
 152         json_result = self._download_json(
 153             'https://api.blerp.com/graphql', audio_id,
 154             data=json.dumps(data).encode(), headers=headers)
 155
 156         bite_json = json_result['data']['web']['biteById']
 157
 158         return {
 159             'id': bite_json['_id'],
 160             'url': bite_json['audio']['mp3']['url'],
 161             'title': bite_json['title'],
 162             'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
 163             'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
 164             'ext': 'mp3',
 165             'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None),
 166         }