1 from .common
import InfoExtractor
2 from ..utils
import merge_dicts
, unified_timestamp
, url_or_none
3 from ..utils
.traversal
import traverse_obj
6 class ZetlandDKArticleIE(InfoExtractor
):
7 _VALID_URL
= r
'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
9 'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
11 'id': 'sO9aq2MY-a81VP3BY-66e69',
13 'modified_date': '20240118',
14 'title': 'Afsnit 1: “Det føltes som en kidnapning.” ',
15 'upload_date': '20240116',
16 'uploader_id': 'a81VP3BY',
17 'modified_timestamp': 1705568739,
18 'release_timestamp': 1705377592,
19 'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
20 'uploader': 'Helle Fuusager',
21 'release_date': '20240116',
22 'thumbnail': r
're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
23 'description': 'md5:9619d426772c133f5abb26db27f26a01',
24 'timestamp': 1705377592,
25 'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
30 def _real_extract(self
, url
):
31 display_id
, uploader_id
= self
._match
_valid
_url
(url
).group('id', 'uploader_id')
32 webpage
= self
._download
_webpage
(url
, display_id
)
34 next_js_data
= self
._search
_nextjs
_data
(webpage
, display_id
)['props']['pageProps']
35 story_data
= traverse_obj(next_js_data
, ('initialState', 'consume', 'story', 'story'))
38 for audio_url
in traverse_obj(story_data
, ('story_content', 'meta', 'audioFiles', ..., {url_or_none}
)):
47 'uploader_id': uploader_id
,
48 }, traverse_obj(story_data
, {
49 'title': ((('story_content', 'content', 'title'), 'title'), {str}
),
50 'uploader': ('sharer', 'name'),
51 'uploader_id': ('sharer', 'sharer_id'),
52 'description': ('story_content', 'content', 'socialDescription'),
53 'series_id': ('story_content', 'meta', 'seriesId'),
54 'release_timestamp': ('published_at', {unified_timestamp}
),
55 'modified_timestamp': ('revised_at', {unified_timestamp}
),
56 }, get_all
=False), traverse_obj(next_js_data
, ('metaInfo', {
57 'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}
),
58 'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}
),
59 'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}
),
60 'uploader_url': ('ld', 'author', 'url', {url_or_none}
),
61 'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}
),
62 'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}
),
63 'release_timestamp': ('ld', 'datePublished', {unified_timestamp}
),
64 'timestamp': ('ld', 'dateCreated', {unified_timestamp}
),
66 'title': self
._html
_search
_meta
(['title', 'og:title', 'twitter:title'], webpage
),
67 'description': self
._html
_search
_meta
(['description', 'og:description', 'twitter:description'], webpage
),
68 'thumbnail': self
._html
_search
_meta
(['og:image', 'twitter:image'], webpage
),
69 'uploader': self
._html
_search
_meta
(['author'], webpage
),
70 'release_timestamp': unified_timestamp(self
._html
_search
_meta
(['article:published_time'], webpage
)),
71 }, self
._search
_json
_ld
(webpage
, display_id
, fatal
=False))