yt_dlp/extractor/playtvak.py

   1 import urllib.parse
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     ExtractorError,
   6     int_or_none,
   7     parse_iso8601,
   8     qualities,
   9 )
  10
  11
  12 class PlaytvakIE(InfoExtractor):
  13     IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz'
  14     _VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)'
  15     _TESTS = [{
  16         'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko',
  17         'md5': '4525ae312c324b4be2f4603cc78ceb4a',
  18         'info_dict': {
  19             'id': 'A150730_150323_hodinovy-manzel_kuko',
  20             'ext': 'mp4',
  21             'title': 'Vyžeňte vosy a sršně ze zahrady',
  22             'description': 'md5:4436e61b7df227a093778efb7e373571',
  23             'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
  24             'duration': 279,
  25             'timestamp': 1438732860,
  26             'upload_date': '20150805',
  27             'is_live': False,
  28         },
  29     }, {  # live video test
  30         'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat',
  31         'info_dict': {
  32             'id': 'A150624_164934_planespotting_cat',
  33             'ext': 'flv',
  34             'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  35             'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
  36             'is_live': True,
  37         },
  38         'params': {
  39             'skip_download': True,  # requires rtmpdump
  40         },
  41     }, {  # another live stream, this one without Misc.videoFLV
  42         'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap',
  43         'info_dict': {
  44             'id': 'A151218_145728_hlavni-nadrazi_plap',
  45             'ext': 'flv',
  46             'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  47             'is_live': True,
  48         },
  49         'params': {
  50             'skip_download': True,  # requires rtmpdump
  51         },
  52     }, {  # idnes.cz
  53         'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku',
  54         'md5': '819832ba33cd7016e58a6658577fe289',
  55         'info_dict': {
  56             'id': 'A150809_104116_domaci_pku',
  57             'ext': 'mp4',
  58             'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se',
  59             'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2',
  60             'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
  61             'duration': 39,
  62             'timestamp': 1438969140,
  63             'upload_date': '20150807',
  64             'is_live': False,
  65         },
  66     }, {  # lidovky.cz
  67         'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE',
  68         'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8',
  69         'info_dict': {
  70             'id': 'A150808_214044_ln-video_ELE',
  71             'ext': 'mp4',
  72             'title': 'Táhni! Demonstrace proti imigrantům budila emoce',
  73             'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c',
  74             'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
  75             'timestamp': 1439052180,
  76             'upload_date': '20150808',
  77             'is_live': False,
  78         },
  79     }, {  # metro.cz
  80         'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row',
  81         'md5': '84fc1deedcac37b7d4a6ccae7c716668',
  82         'info_dict': {
  83             'id': 'A141111_173251_metro-extra_row',
  84             'ext': 'mp4',
  85             'title': 'Recesisté udělali z billboardu kolotoč',
  86             'description': 'md5:7369926049588c3989a66c9c1a043c4c',
  87             'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
  88             'timestamp': 1415725500,
  89             'upload_date': '20141111',
  90             'is_live': False,
  91         },
  92     }, {
  93         'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko',
  94         'only_matching': True,
  95     }]
  96
  97     def _real_extract(self, url):
  98         video_id = self._match_id(url)
  99
 100         webpage = self._download_webpage(url, video_id)
 101
 102         info_url = self._html_search_regex(
 103             r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
 104
 105         parsed_url = urllib.parse.urlparse(info_url)
 106
 107         qs = urllib.parse.parse_qs(parsed_url.query)
 108         qs.update({
 109             'reklama': ['0'],
 110             'type': ['js'],
 111         })
 112
 113         info_url = urllib.parse.urlunparse(
 114             parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
 115
 116         json_info = self._download_json(
 117             info_url, video_id,
 118             transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
 119
 120         item = None
 121         for i in json_info['items']:
 122             if i.get('type') == 'video' or i.get('type') == 'stream':
 123                 item = i
 124                 break
 125         if not item:
 126             raise ExtractorError('No suitable stream found')
 127
 128         quality = qualities(('low', 'middle', 'high'))
 129
 130         formats = []
 131         for fmt in item['video']:
 132             video_url = fmt.get('file')
 133             if not video_url:
 134                 continue
 135
 136             format_ = fmt['format']
 137             format_id = '{}_{}'.format(format_, fmt['quality'])
 138             preference = None
 139
 140             if format_ in ('mp4', 'webm'):
 141                 ext = format_
 142             elif format_ == 'rtmp':
 143                 ext = 'flv'
 144             elif format_ == 'apple':
 145                 ext = 'mp4'
 146                 # Some streams have mp3 audio which does not play
 147                 # well with ffmpeg filter aac_adtstoasc
 148                 preference = -10
 149             elif format_ == 'adobe':  # f4m manifest fails with 404 in 80% of requests
 150                 continue
 151             else:  # Other formats not supported yet
 152                 continue
 153
 154             formats.append({
 155                 'url': video_url,
 156                 'ext': ext,
 157                 'format_id': format_id,
 158                 'quality': quality(fmt.get('quality')),
 159                 'preference': preference,
 160             })
 161
 162         title = item['title']
 163         is_live = item['type'] == 'stream'
 164         description = self._og_search_description(webpage, default=None) or self._html_search_meta(
 165             'description', webpage, 'description', default=None)
 166         timestamp = None
 167         duration = None
 168         if not is_live:
 169             duration = int_or_none(item.get('length'))
 170             timestamp = item.get('published')
 171             if timestamp:
 172                 timestamp = parse_iso8601(timestamp[:-5])
 173
 174         return {
 175             'id': video_id,
 176             'title': title,
 177             'description': description,
 178             'thumbnail': item.get('image'),
 179             'duration': duration,
 180             'timestamp': timestamp,
 181             'is_live': is_live,
 182             'formats': formats,
 183         }