[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / nfhsnetwork.py
blobec746ecb17605ace909383708f06924ed3d06e2d
1 from .common import InfoExtractor
2 from ..utils import try_get, unified_strdate, unified_timestamp
5 class NFHSNetworkIE(InfoExtractor):
6 IE_NAME = 'NFHSNetwork'
7 _VALID_URL = r'https?://(?:www\.)?nfhsnetwork\.com/events/[\w-]+/(?P<id>(?:gam|evt|dd|)?[\w\d]{0,10})'
8 _TESTS = [{
9 # Auto-generated two-team sport (pixellot)
10 'url': 'https://www.nfhsnetwork.com/events/rockford-high-school-rockford-mi/gamcf7e54cfbc',
11 'info_dict': {
12 'id': 'gamcf7e54cfbc',
13 'ext': 'mp4',
14 'title': 'Rockford vs Spring Lake - Girls Varsity Lacrosse 03/27/2021',
15 'uploader': 'MHSAA - Michigan: Rockford High School, Rockford, MI',
16 'uploader_id': 'cd2622cf76',
17 'uploader_url': 'https://www.nfhsnetwork.com/schools/rockford-high-school-rockford-mi',
18 'location': 'Rockford, Michigan',
19 'timestamp': 1616859000,
20 'upload_date': '20210327',
22 'params': {
23 # m3u8 download
24 'skip_download': True,
26 }, {
27 # Non-sport activity with description
28 'url': 'https://www.nfhsnetwork.com/events/limon-high-school-limon-co/evt4a30e3726c',
29 'info_dict': {
30 'id': 'evt4a30e3726c',
31 'ext': 'mp4',
32 'title': 'Drama Performance Limon High School vs. Limon High School - 12/13/2020',
33 'description': 'Join the broadcast of the Limon High School Musical Performance at 2 PM.',
34 'uploader': 'CHSAA: Limon High School, Limon, CO',
35 'uploader_id': '7d2d121332',
36 'uploader_url': 'https://www.nfhsnetwork.com/schools/limon-high-school-limon-co',
37 'location': 'Limon, Colorado',
38 'timestamp': 1607893200,
39 'upload_date': '20201213',
41 'params': {
42 # m3u8 download
43 'skip_download': True,
45 }, {
46 # Postseason game
47 'url': 'https://www.nfhsnetwork.com/events/nfhs-network-special-events/dd8de71d45',
48 'info_dict': {
49 'id': 'dd8de71d45',
50 'ext': 'mp4',
51 'title': '2015 UA Holiday Classic Tournament: National Division - 12/26/2015',
52 'uploader': 'SoCal Sports Productions',
53 'uploader_id': '063dba0150',
54 'uploader_url': 'https://www.nfhsnetwork.com/affiliates/socal-sports-productions',
55 'location': 'San Diego, California',
56 'timestamp': 1451187000,
57 'upload_date': '20151226',
59 'params': {
60 # m3u8 download
61 'skip_download': True,
63 }, {
64 # Video with no broadcasts object
65 'url': 'https://www.nfhsnetwork.com/events/wiaa-wi/9aa2f92f82',
66 'info_dict': {
67 'id': '9aa2f92f82',
68 'ext': 'mp4',
69 'title': 'Competitive Equity - 01/21/2015',
70 'description': 'Committee members discuss points of their research regarding a competitive equity plan',
71 'uploader': 'WIAA - Wisconsin: Wisconsin Interscholastic Athletic Association',
72 'uploader_id': 'a49f7d1002',
73 'uploader_url': 'https://www.nfhsnetwork.com/associations/wiaa-wi',
74 'location': 'Stevens Point, Wisconsin',
75 'timestamp': 1421856000,
76 'upload_date': '20150121',
78 'params': {
79 # m3u8 download
80 'skip_download': True,
85 def _real_extract(self, url):
86 video_id = self._match_id(url)
87 webpage = self._download_webpage(url, video_id)
88 data = self._download_json(
89 'https://cfunity.nfhsnetwork.com/v2/game_or_event/' + video_id,
90 video_id)
91 publisher = data.get('publishers')[0] # always exists
92 broadcast = (publisher.get('broadcasts') or publisher.get('vods'))[0] # some (older) videos don't have a broadcasts object
93 uploader = publisher.get('formatted_name') or publisher.get('name')
94 uploader_id = publisher.get('publisher_key')
95 pub_type = publisher.get('type')
96 uploader_prefix = (
97 'schools' if pub_type == 'school'
98 else 'associations' if 'association' in pub_type
99 else 'affiliates' if (pub_type == 'publisher' or pub_type == 'affiliate')
100 else 'schools')
101 uploader_page = 'https://www.nfhsnetwork.com/{}/{}'.format(uploader_prefix, publisher.get('slug'))
102 location = '{}, {}'.format(data.get('city'), data.get('state_name'))
103 description = broadcast.get('description')
104 is_live = broadcast.get('on_air') or broadcast.get('status') == 'on_air' or False
106 timestamp = unified_timestamp(data.get('local_start_time'))
107 upload_date = unified_strdate(data.get('local_start_time'))
109 title = (
110 self._og_search_title(webpage)
111 or self._html_search_regex(r'<h1 class="sr-hidden">(.*?)</h1>', webpage, 'title'))
112 title = title.split('|')[0].strip()
114 video_type = 'broadcasts' if is_live else 'vods'
115 key = broadcast.get('key') if is_live else try_get(publisher, lambda x: x['vods'][0]['key'])
116 m3u8_url = self._download_json(
117 f'https://cfunity.nfhsnetwork.com/v2/{video_type}/{key}/url',
118 video_id).get('video_url')
120 formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=is_live)
122 return {
123 'id': video_id,
124 'title': title,
125 'formats': formats,
126 'description': description,
127 'timestamp': timestamp,
128 'uploader': uploader,
129 'uploader_id': uploader_id,
130 'uploader_url': uploader_page,
131 'location': location,
132 'upload_date': upload_date,
133 'is_live': is_live,
134 '_format_sort_fields': ('res', 'tbr'),