yt_dlp/extractor/vocaroo.py

   1 from .common import InfoExtractor
   2 from ..networking import HEADRequest
   3 from ..utils import float_or_none
   4
   5
   6 class VocarooIE(InfoExtractor):
   7     _VALID_URL = r'https?://(?:www\.)?(?:vocaroo\.com|voca\.ro)/(?:embed/)?(?P<id>\w+)'
   8     _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?vocaroo\.com/embed/.+?)\1']
   9     _TESTS = [
  10         {
  11             'url': 'https://vocaroo.com/1de8yA3LNe77',
  12             'md5': 'c557841d5e50261777a6585648adf439',
  13             'info_dict': {
  14                 'id': '1de8yA3LNe77',
  15                 'ext': 'mp3',
  16                 'title': 'Vocaroo video #1de8yA3LNe77',
  17                 'timestamp': 1675059800.370,
  18                 'upload_date': '20230130',
  19             },
  20         },
  21         {
  22             'url': 'https://vocaroo.com/embed/12WqtjLnpj6g?autoplay=0',
  23             'only_matching': True,
  24         },
  25         {
  26             'url': 'https://voca.ro/12D52rgpzkB0',
  27             'only_matching': True,
  28         },
  29     ]
  30
  31     _WEBPAGE_TESTS = [
  32         {
  33             'url': 'https://qbnu.github.io/cool.html',
  34             'md5': 'f322e529275dd8a47994919eeac404a5',
  35             'info_dict': {
  36                 'id': '19cgWmKO6AmC',
  37                 'ext': 'mp3',
  38                 'title': 'Vocaroo video #19cgWmKO6AmC',
  39                 'timestamp': 1675093841.408,
  40                 'upload_date': '20230130',
  41             },
  42         },
  43     ]
  44
  45     def _real_extract(self, url):
  46         audio_id = self._match_id(url)
  47         if len(audio_id) == 10 or (len(audio_id) == 12 and audio_id[0] == '1'):
  48             media_subdomain = 'media1'
  49         else:
  50             media_subdomain = 'media'
  51
  52         url = f'https://{media_subdomain}.vocaroo.com/mp3/{audio_id}'
  53         http_headers = {'Referer': 'https://vocaroo.com/'}
  54         resp = self._request_webpage(HEADRequest(url), audio_id, headers=http_headers)
  55         return {
  56             'id': audio_id,
  57             'title': '',
  58             'url': url,
  59             'ext': 'mp3',
  60             'timestamp': float_or_none(resp.headers.get('x-bz-upload-timestamp'), scale=1000),
  61             'vcodec': 'none',
  62             'http_headers': http_headers,
  63         }