Support conversion of linkshere
[dueringa_WikiWalker.git] / src / JsonSerializer.cpp
blob207578450da221cc0a94bf702631865daf8a223b
1 //! \file
3 #include <json/json.h>
5 #include "Article.h"
6 #include "JsonSerializer.h"
7 #include "WalkerException.h"
9 namespace WikiWalker
11 namespace JsonSerializerInformation
13 constexpr const char* SchemeVersionName = "scheme-version";
14 static int SchemeVersion = 2;
16 constexpr const char* ProgramKeyName = "program";
17 constexpr const char* ProgramValue = "wikiwalker";
19 constexpr const char* CollectionKey = "ArticleCollection";
20 } // namespace JsonSerializerInformation
22 /*! Get article links in an array.
23 * Basically undoing the Wikipedia to article conversion...
24 * \param article pointer to article which links should be extracted
25 * \return Json::Value array with titles as string
26 * \todo should / could be a member function, but then I'd have to expose
27 * Json::Value, which is ugly and clutters up other classes...
29 static Json::Value getArticleLinks(const Article* article)
31 Json::Value array(Json::ValueType::arrayValue);
33 for(auto ali = article->linkBegin(); ali != article->linkEnd(); ali++) {
34 auto a = ali->lock();
35 if(a != nullptr) {
36 std::string tit = a->title();
37 array.append(Json::Value(tit));
39 // nullptrs in articles will simply be *omitted*
40 // else {
41 // // do nothing!
42 // }
45 return array;
48 /*! Convert article to string representation of JSON representation.
49 * \param ac reference to articlecollection to be converted
50 * \return json as string
51 * \internal uses the following format:
52 * {"title":{"forward_links":[...]}, "title2":{"forward_links":[...]}, ...}
54 static std::string convertToJson(const CollectionUtils::ArticleCollection& ac)
56 Json::Value header(Json::ValueType::objectValue);
57 header[JsonSerializerInformation::ProgramKeyName] =
58 JsonSerializerInformation::ProgramValue;
59 header[JsonSerializerInformation::SchemeVersionName] =
60 JsonSerializerInformation::SchemeVersion;
62 Json::Value val(Json::ValueType::objectValue);
64 for(auto ar : ac) {
65 Json::Value linkObj(Json::ValueType::objectValue);
67 if(ar.second->analyzed()) {
68 linkObj["forward_links"] = getArticleLinks(ar.second.get());
69 } else {
70 linkObj["forward_links"] = Json::Value::nullSingleton();
73 val[ar.first] = linkObj;
75 header[JsonSerializerInformation::CollectionKey] = val;
77 Json::StreamWriterBuilder swb;
78 swb["indentation"] = "";
79 return Json::writeString(swb, header);
82 void JsonSerializer::serialize(
83 const CollectionUtils::ArticleCollection& collection,
84 std::ostream& outstream)
86 outstream << convertToJson(collection);
89 CollectionUtils::ArticleCollection JsonSerializer::deserialize(
90 std::istream& instream)
92 CollectionUtils::ArticleCollection ac;
93 deserialize(ac, instream);
94 return ac;
97 void JsonSerializer::deserialize(
98 CollectionUtils::ArticleCollection& collection,
99 std::istream& instream)
101 Json::Value document;
102 Json::CharReaderBuilder crb;
103 Json::CharReaderBuilder::strictMode(&crb.settings_);
104 bool success = Json::parseFromStream(crb, instream, &document, nullptr);
106 if(!success) {
107 throw WalkerException("Error parsing JSON");
110 if(!document.isObject()) {
111 throw WalkerException("Error: Json root is not an object");
115 auto programName = document.get(JsonSerializerInformation::ProgramKeyName,
116 Json::Value::nullSingleton());
117 if(programName.isNull() || !programName.isString() ||
118 programName.asString() != JsonSerializerInformation::ProgramValue) {
119 throw WalkerException("Error: Wrong program name");
123 auto schemeVersion =
124 document.get(JsonSerializerInformation::SchemeVersionName,
125 Json::Value::nullSingleton());
126 if(schemeVersion.isNull() || !schemeVersion.isNumeric() ||
127 schemeVersion.asLargestUInt() !=
128 JsonSerializerInformation::SchemeVersion) {
129 throw WalkerException("Error: Wrong scheme version");
133 auto coll = document.get(JsonSerializerInformation::CollectionKey,
134 Json::Value::nullSingleton());
135 if(coll.isNull() || !coll.isObject()) {
136 throw WalkerException("Error: collection is not serialized correctly");
139 // get all "main" articles first
140 for(auto& titleElement : coll.getMemberNames()) {
141 std::string title = titleElement;
143 //! \todo find a better solution than get-compare-add
144 auto a = CollectionUtils::get(collection, title);
146 if(a == nullptr) {
147 a = std::make_shared<Article>(title);
148 CollectionUtils::add(collection, a);
151 auto links = coll.get(title, Json::Value::nullSingleton())
152 .get("forward_links", Json::Value::nullSingleton());
154 if(links.isNull()) {
155 /* don't need to set article analyzed to false,
156 * since that's the default */
157 continue;
160 a->analyzed(true);
162 for(const auto& linkedArticle : links) {
163 std::string linkedTitle = linkedArticle.asString();
164 std::shared_ptr<Article> la =
165 CollectionUtils::get(collection, linkedTitle);
167 if(la == nullptr) {
168 la = std::make_shared<Article>(linkedTitle);
169 CollectionUtils::add(collection, la);
172 a->addLink(la);
176 } // namespace WikiWalker