1 //! \file WikimediaJsonToArticleConverter.cpp
3 #include "WikimediaJsonToArticleConverter.h"
4 #include "WalkerException.h"
9 //! \todo really ugly workaround, passing in the ArticleCollection instance... :/
10 Article
* WikimediaJsonToArticleConverter::convertToArticle(std::string json
,
11 ArticleCollection
& articleCache
)
15 bool success
= reader
.parse(json
, document
, false);
18 throw WalkerException("Error parsing JSON");
21 auto allPages
= document
.get("query", Json::Value::nullSingleton())
22 .get("pages", Json::Value::nullSingleton());
24 // only get first page
25 auto wantedPage
= allPages
.get(allPages
.getMemberNames()[0],
26 Json::Value::nullSingleton());
28 if(wantedPage
.isMember("missing")) {
29 throw WalkerException("Article doesn't exist");
30 } else if(wantedPage
.isMember("invalid")) {
31 throw WalkerException("Invalid article");
34 //! get normalized title not necessary, "title" is already
35 std::string wantedArticleTitle
= wantedPage
.get("title",
36 Json::Value::nullSingleton()).asString();
37 Article
* wantedArticle
= articleCache
.get(wantedArticleTitle
);
39 if(wantedArticle
== nullptr) {
40 wantedArticle
= new Article(wantedArticleTitle
);
43 articleCache
.add(wantedArticle
);
46 for(const auto& linked
: wantedPage
.get("links", Json::Value::nullSingleton())) {
47 auto linkedPageTitle
= linked
.get("title", Json::Value::nullSingleton()).asString();
48 auto par
= articleCache
.get(linkedPageTitle
);
51 par
= new Article(linkedPageTitle
);
52 articleCache
.add(par
);
55 wantedArticle
->addLink(par
);
58 wantedArticle
->setAnalyzed(true);
60 if(!document
.isMember("batchcomplete")) {
63 document
.get("continue", Json::Value::nullSingleton())
64 .get("plcontinue", Json::Value::nullSingleton())