2 from .common
import InfoExtractor
10 class AbcNewsVideoIE(AMPIE
):
11 IE_NAME
= 'abcnews:video'
17 (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
18 video/(?:embed|itemfeed)\?.*?\bid=
20 fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
26 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
30 'display_id': 'week-exclusive-irans-foreign-minister-zarif',
31 'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
32 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
34 'thumbnail': r
're:^https?://.*\.jpg$',
35 'timestamp': 1380454200,
36 'upload_date': '20130929',
40 'skip_download': True,
43 'url': 'http://abcnews.go.com/video/embed?id=46979033',
44 'only_matching': True,
46 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
47 'only_matching': True,
49 'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
50 'only_matching': True,
52 'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
53 'only_matching': True,
56 def _real_extract(self
, url
):
57 mobj
= self
._match
_valid
_url
(url
)
58 display_id
= mobj
.group('display_id')
59 video_id
= mobj
.group('id')
60 info_dict
= self
._extract
_feed
_info
(
61 f
'http://abcnews.go.com/video/itemfeed?id={video_id}')
64 'display_id': display_id
,
69 class AbcNewsIE(InfoExtractor
):
71 _VALID_URL
= r
'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
75 'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
78 'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
79 'description': 'Billingsley went from a child actor to Hollywood power player.',
83 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
87 'title': 'Justin Timberlake Drops Hints For Secret Single',
88 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
89 'upload_date': '20160505',
90 'timestamp': 1462442280,
94 'skip_download': True,
95 # The embedded YouTube video is blocked due to copyright issues
96 'playlist_items': '1',
98 'add_ie': ['AbcNewsVideo'],
100 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
101 'only_matching': True,
103 # inline.type == 'video'
104 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
105 'only_matching': True,
108 def _real_extract(self
, url
):
109 story_id
= self
._match
_id
(url
)
110 webpage
= self
._download
_webpage
(url
, story_id
)
111 story
= self
._parse
_json
(self
._search
_regex
(
112 r
"window\['__abcnews__'\]\s*=\s*({.+?});",
113 webpage
, 'data'), story_id
)['page']['content']['story']['everscroll'][0]
114 article_contents
= story
.get('articleContents') or {}
117 featured_video
= story
.get('featuredVideo') or {}
118 feed
= try_get(featured_video
, lambda x
: x
['video']['feed'])
122 'id': featured_video
.get('id'),
123 'title': featured_video
.get('name'),
125 'thumbnail': featured_video
.get('images'),
126 'description': featured_video
.get('description'),
127 'timestamp': parse_iso8601(featured_video
.get('uploadDate')),
128 'duration': parse_duration(featured_video
.get('duration')),
129 'ie_key': AbcNewsVideoIE
.ie_key(),
132 for inline
in (article_contents
.get('inlines') or []):
133 inline_type
= inline
.get('type')
134 if inline_type
== 'iframe':
135 iframe_url
= try_get(inline
, lambda x
: x
['attrs']['src'])
137 yield self
.url_result(iframe_url
)
138 elif inline_type
== 'video':
139 video_id
= inline
.get('id')
144 'url': 'http://abcnews.go.com/video/embed?id=' + video_id
,
145 'thumbnail': inline
.get('imgSrc') or inline
.get('imgDefault'),
146 'description': inline
.get('description'),
147 'duration': parse_duration(inline
.get('duration')),
148 'ie_key': AbcNewsVideoIE
.ie_key(),
151 return self
.playlist_result(
152 entries(), story_id
, article_contents
.get('headline'),
153 article_contents
.get('subHead'))