yt_dlp/extractor/abcotvs.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     dict_get,
   4     int_or_none,
   5     try_get,
   6 )
   7
   8
   9 class ABCOTVSIE(InfoExtractor):
  10     IE_NAME = 'abcotvs'
  11     IE_DESC = 'ABC Owned Television Stations'
  12     _VALID_URL = r'https?://(?P<site>abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P<display_id>[^/]+))?/(?P<id>\d+)'
  13     _TESTS = [
  14         {
  15             'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
  16             'info_dict': {
  17                 'id': '472548',
  18                 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
  19                 'ext': 'mp4',
  20                 'title': 'East Bay museum celebrates synthesized music',
  21                 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
  22                 'thumbnail': r're:^https?://.*\.jpg$',
  23                 'timestamp': 1421118520,
  24                 'upload_date': '20150113',
  25             },
  26             'params': {
  27                 # m3u8 download
  28                 'skip_download': True,
  29             },
  30         },
  31         {
  32             'url': 'http://abc7news.com/472581',
  33             'only_matching': True,
  34         },
  35         {
  36             'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/',
  37             'only_matching': True,
  38         },
  39     ]
  40     _SITE_MAP = {
  41         '6abc': 'wpvi',
  42         'abc11': 'wtvd',
  43         'abc13': 'ktrk',
  44         'abc30': 'kfsn',
  45         'abc7': 'kabc',
  46         'abc7chicago': 'wls',
  47         'abc7news': 'kgo',
  48         'abc7ny': 'wabc',
  49     }
  50
  51     def _real_extract(self, url):
  52         site, display_id, video_id = self._match_valid_url(url).groups()
  53         display_id = display_id or video_id
  54         station = self._SITE_MAP[site]
  55
  56         data = self._download_json(
  57             'https://api.abcotvs.com/v2/content', display_id, query={
  58                 'id': video_id,
  59                 'key': f'otv.web.{station}.story',
  60                 'station': station,
  61             })['data']
  62         video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
  63         video_id = str(dict_get(video, ('id', 'publishedKey'), video_id))
  64         title = video.get('title') or video['linkText']
  65
  66         formats = []
  67         m3u8_url = video.get('m3u8')
  68         if m3u8_url:
  69             formats = self._extract_m3u8_formats(
  70                 video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False)
  71         mp4_url = video.get('mp4')
  72         if mp4_url:
  73             formats.append({
  74                 'abr': 128,
  75                 'format_id': 'https',
  76                 'height': 360,
  77                 'url': mp4_url,
  78                 'width': 640,
  79             })
  80
  81         image = video.get('image') or {}
  82
  83         return {
  84             'id': video_id,
  85             'display_id': display_id,
  86             'title': title,
  87             'description': dict_get(video, ('description', 'caption'), try_get(video, lambda x: x['meta']['description'])),
  88             'thumbnail': dict_get(image, ('source', 'dynamicSource')),
  89             'timestamp': int_or_none(video.get('date')),
  90             'duration': int_or_none(video.get('length')),
  91             'formats': formats,
  92         }
  93
  94
  95 class ABCOTVSClipsIE(InfoExtractor):
  96     IE_NAME = 'abcotvs:clips'
  97     _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
  98     _TEST = {
  99         'url': 'https://clips.abcotvs.com/kabc/video/214814',
 100         'info_dict': {
 101             'id': '214814',
 102             'ext': 'mp4',
 103             'title': 'SpaceX launch pad explosion destroys rocket, satellite',
 104             'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
 105             'upload_date': '20160901',
 106             'timestamp': 1472756695,
 107         },
 108         'params': {
 109             # m3u8 download
 110             'skip_download': True,
 111         },
 112     }
 113
 114     def _real_extract(self, url):
 115         video_id = self._match_id(url)
 116         video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
 117         title = video_data['title']
 118         formats = self._extract_m3u8_formats(
 119             video_data['videoURL'].split('?')[0], video_id, 'mp4')
 120
 121         return {
 122             'id': video_id,
 123             'title': title,
 124             'description': video_data.get('description'),
 125             'thumbnail': video_data.get('thumbnailURL'),
 126             'duration': int_or_none(video_data.get('duration')),
 127             'timestamp': int_or_none(video_data.get('pubDate')),
 128             'formats': formats,
 129         }