[ie/facebook] Support more groups URLs (#11576)
[yt-dlp3.git] / yt_dlp / extractor / internetvideoarchive.py
blob9d2574cb068d6931be2e1d5e043a18aff024e21a
1 import json
2 import re
4 from .common import InfoExtractor
5 from ..utils import parse_qs
8 class InternetVideoArchiveIE(InfoExtractor):
9 _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
11 _TEST = {
12 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
13 'info_dict': {
14 'id': '194487',
15 'ext': 'mp4',
16 'title': 'Kick-Ass 2',
17 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
19 'params': {
20 # m3u8 download
21 'skip_download': True,
25 @staticmethod
26 def _build_json_url(query):
27 return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
29 def _real_extract(self, url):
30 query = parse_qs(url)
31 video_id = query['publishedid'][0]
32 data = self._download_json(
33 'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
34 video_id, data=json.dumps({
35 'customerid': query['customerid'][0],
36 'publishedid': video_id,
37 }).encode())
38 title = data['Title']
39 formats = self._extract_m3u8_formats(
40 data['VideoUrl'], video_id, 'mp4',
41 'm3u8_native', m3u8_id='hls', fatal=False)
42 file_url = formats[0]['url']
43 if '.ism/' in file_url:
44 replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
45 formats.extend(self._extract_f4m_formats(
46 replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
47 formats.extend(self._extract_mpd_formats(
48 replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
49 formats.extend(self._extract_ism_formats(
50 replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
52 return {
53 'id': video_id,
54 'title': title,
55 'formats': formats,
56 'thumbnail': data.get('PosterUrl'),
57 'description': data.get('Description'),