yt_dlp/extractor/steam.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     ExtractorError,
   6     extract_attributes,
   7     get_element_by_class,
   8     str_or_none,
   9 )
  10
  11
  12 class SteamIE(InfoExtractor):
  13     _VALID_URL = r'''(?x)
  14         https?://(?:store\.steampowered|steamcommunity)\.com/
  15             (?:agecheck/)?
  16             (?P<urltype>video|app)/ #If the page is only for videos or for a game
  17             (?P<gameID>\d+)/?
  18             (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
  19         |
  20         https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
  21     '''
  22     _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
  23     _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
  24     _TESTS = [{
  25         'url': 'http://store.steampowered.com/video/105600/',
  26         'playlist': [
  27             {
  28                 'md5': '695242613303ffa2a4c44c9374ddc067',
  29                 'info_dict': {
  30                     'id': '256785003',
  31                     'ext': 'mp4',
  32                     'title': 'Terraria video 256785003',
  33                     'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
  34                 },
  35             },
  36             {
  37                 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
  38                 'info_dict': {
  39                     'id': '2040428',
  40                     'ext': 'mp4',
  41                     'title': 'Terraria video 2040428',
  42                     'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
  43                 },
  44             },
  45         ],
  46         'info_dict': {
  47             'id': '105600',
  48             'title': 'Terraria',
  49         },
  50         'params': {
  51             'playlistend': 2,
  52         },
  53     }, {
  54         'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
  55         'info_dict': {
  56             'id': '271590',
  57             'title': 'Grand Theft Auto V',
  58         },
  59         'playlist_count': 23,
  60     }]
  61
  62     def _real_extract(self, url):
  63         m = self._match_valid_url(url)
  64         file_id = m.group('fileID')
  65         if file_id:
  66             video_url = url
  67             playlist_id = file_id
  68         else:
  69             game_id = m.group('gameID')
  70             playlist_id = game_id
  71             video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id
  72
  73         self._set_cookie('steampowered.com', 'wants_mature_content', '1')
  74         self._set_cookie('steampowered.com', 'birthtime', '944006401')
  75         self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000')
  76
  77         webpage = self._download_webpage(video_url, playlist_id)
  78
  79         if re.search('<div[^>]+>Please enter your birth date to continue:</div>', webpage) is not None:
  80             video_url = self._AGECHECK_TEMPLATE % playlist_id
  81             self.report_age_confirmation()
  82             webpage = self._download_webpage(video_url, playlist_id)
  83
  84         videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
  85         entries = []
  86         playlist_title = get_element_by_class('apphub_AppName', webpage)
  87         for movie, movie_id in videos:
  88             if not movie:
  89                 continue
  90             movie = extract_attributes(movie)
  91             if not movie_id:
  92                 continue
  93             entry = {
  94                 'id': movie_id,
  95                 'title': f'{playlist_title} video {movie_id}',
  96             }
  97             formats = []
  98             if movie:
  99                 entry['thumbnail'] = movie.get('data-poster')
 100                 for quality in ('', '-hd'):
 101                     for ext in ('webm', 'mp4'):
 102                         video_url = movie.get(f'data-{ext}{quality}-source')
 103                         if video_url:
 104                             formats.append({
 105                                 'format_id': ext + quality,
 106                                 'url': video_url,
 107                             })
 108             entry['formats'] = formats
 109             entries.append(entry)
 110         embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
 111         for evideos in embedded_videos:
 112             evideos = extract_attributes(evideos).get('src')
 113             video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None)
 114             if video_id:
 115                 entries.append({
 116                     '_type': 'url_transparent',
 117                     'id': video_id,
 118                     'url': video_id,
 119                     'ie_key': 'Youtube',
 120                 })
 121         if not entries:
 122             raise ExtractorError('Could not find any videos')
 123
 124         return self.playlist_result(entries, playlist_id, playlist_title)
 125
 126
 127 class SteamCommunityBroadcastIE(InfoExtractor):
 128     _VALID_URL = r'https?://steamcommunity\.(?:com)/broadcast/watch/(?P<id>\d+)'
 129     _TESTS = [{
 130         'url': 'https://steamcommunity.com/broadcast/watch/76561199073851486',
 131         'info_dict': {
 132             'id': '76561199073851486',
 133             'title': r're:Steam Community :: pepperm!nt :: Broadcast 2022-06-26 \d{2}:\d{2}',
 134             'ext': 'mp4',
 135             'uploader_id': '1113585758',
 136             'uploader': 'pepperm!nt',
 137             'live_status': 'is_live',
 138         },
 139         'skip': 'Stream has ended',
 140     }]
 141
 142     def _real_extract(self, url):
 143         video_id = self._match_id(url)
 144         webpage = self._download_webpage(url, video_id)
 145         json_data = self._download_json(
 146             'https://steamcommunity.com/broadcast/getbroadcastmpd/',
 147             video_id, query={'steamid': f'{video_id}'})
 148
 149         formats, subs = self._extract_m3u8_formats_and_subtitles(json_data['hls_url'], video_id)
 150
 151         ''' # We cannot download live dash atm
 152         mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(json_data['url'], video_id)
 153         formats.extend(mpd_formats)
 154         self._merge_subtitles(mpd_subs, target=subs)
 155         '''
 156
 157         uploader_json = self._download_json(
 158             'https://steamcommunity.com/actions/ajaxresolveusers',
 159             video_id, query={'steamids': video_id})[0]
 160
 161         return {
 162             'id': video_id,
 163             'title': self._generic_title('', webpage),
 164             'formats': formats,
 165             'live_status': 'is_live',
 166             'view_count': json_data.get('num_view'),
 167             'uploader': uploader_json.get('persona_name'),
 168             'uploader_id': str_or_none(uploader_json.get('accountid')),
 169             'subtitles': subs,
 170         }