1 //! \file WikimediaJsonToArticleConverter.cpp
3 #include "WikimediaJsonToArticleConverter.h"
11 #include "WalkerException.h"
15 //! \todo really ugly workaround, passing in the
16 //! CollectionUtils::ArticleCollection instance...
18 WikimediaJsonToArticleConverter::ContinuationStatus
19 WikimediaJsonToArticleConverter::convert(
20 const std::string
& json
,
21 CollectionUtils::ArticleCollection
& articleCache
)
24 Json::CharReaderBuilder crb
;
25 Json::CharReaderBuilder::strictMode(&crb
.settings_
);
26 std::istringstream
jsonStream(json
);
27 bool success
= Json::parseFromStream(crb
, jsonStream
, &document
, nullptr);
30 throw WalkerException("Error parsing JSON");
33 auto allPages
= document
.get("query", Json::Value::nullSingleton())
34 .get("pages", Json::Value::nullSingleton());
36 for(auto& onePage
: allPages
) {
37 //! get normalized title not necessary, "title" is already
38 std::string oneTitle
=
39 onePage
.get("title", Json::Value::nullSingleton()).asString();
41 //! \todo find a better solution than get-compare-add
42 auto wantedArticle
= CollectionUtils::get(articleCache
, oneTitle
);
44 if(wantedArticle
== nullptr) {
45 wantedArticle
= std::make_shared
<Article
>(oneTitle
);
46 CollectionUtils::add(articleCache
, wantedArticle
);
49 if(onePage
.isMember("missing") || onePage
.isMember("invalid")) {
50 wantedArticle
->marked(true);
51 wantedArticle
->analyzed(true);
56 //! \todo support linkshere
57 std::shared_ptr
<Article
> par
;
58 for(const auto& linked
:
59 onePage
.get("links", Json::Value::nullSingleton())) {
60 auto linkedPageTitle
=
61 linked
.get("title", Json::Value::nullSingleton()).asString();
62 par
= CollectionUtils::get(articleCache
, linkedPageTitle
);
65 par
= std::make_shared
<Article
>(linkedPageTitle
);
66 CollectionUtils::add(articleCache
, par
);
69 wantedArticle
->addLink(par
);
72 wantedArticle
->analyzed(true);
77 // always clear, otherwise insert won't happen
78 continuationData_
.clear();
80 if(document
.isMember("continue")) {
83 document
.get("continue", Json::Value::nullSingleton());
84 assert(contData
.isObject());
86 for(auto it
= contData
.begin(); it
!= contData
.end(); it
++) {
87 continuationData_
.emplace(it
.name(), it
->asString());
93 return moreData
? ContinuationStatus::ConversionNeedsMoreData
94 : ContinuationStatus::ConversionCompleted
;
96 } // namespace WikiWalker