src/WikimediaJsonToArticleConverter.cpp

   1 //! \file WikimediaJsonToArticleConverter.cpp
   2
   3 #include "WikimediaJsonToArticleConverter.h"
   4 #include "WalkerException.h"
   5 #include "Article.h"
   6
   7 #include <json/json.h>
   8
   9 //! \todo really ugly workaround, passing in the ArticleCollection instance... :/
  10 Article* WikimediaJsonToArticleConverter::convertToArticle(std::string json,
  11         ArticleCollection& articleCache)
  12 {
  13     Json::Reader reader;
  14     Json::Value document;
  15     bool success = reader.parse(json, document, false);
  16
  17     if(!success) {
  18         throw WalkerException("Error parsing JSON");
  19     }
  20
  21     auto allPages = document.get("query", Json::Value::nullSingleton())
  22                             .get("pages", Json::Value::nullSingleton());
  23
  24     // only get first page
  25     auto wantedPage = allPages.get(allPages.getMemberNames()[0],
  26                                    Json::Value::nullSingleton());
  27
  28     if(wantedPage.isMember("missing")) {
  29         throw WalkerException("Article doesn't exist");
  30     } else if(wantedPage.isMember("invalid")) {
  31         throw WalkerException("Invalid article");
  32     }
  33
  34     //! get normalized title not necessary, "title" is already
  35     std::string wantedArticleTitle = wantedPage.get("title",
  36                                      Json::Value::nullSingleton()).asString();
  37     Article* wantedArticle = articleCache.get(wantedArticleTitle);
  38
  39     if(wantedArticle == nullptr) {
  40         wantedArticle = new Article(wantedArticleTitle);
  41     }
  42
  43     articleCache.add(wantedArticle);
  44
  45     // add links
  46     for(const auto& linked : wantedPage.get("links", Json::Value::nullSingleton())) {
  47         auto linkedPageTitle = linked.get("title", Json::Value::nullSingleton()).asString();
  48         auto par = articleCache.get(linkedPageTitle);
  49
  50         if(par == nullptr) {
  51             par = new Article(linkedPageTitle);
  52             articleCache.add(par);
  53         }
  54
  55         wantedArticle->addLink(par);
  56     }
  57
  58     wantedArticle->setAnalyzed(true);
  59
  60     if(!document.isMember("batchcomplete")) {
  61         moreData = true;
  62         continueString =
  63           document.get("continue", Json::Value::nullSingleton())
  64                   .get("plcontinue", Json::Value::nullSingleton())
  65                     .asString();
  66     } else {
  67         moreData = false;
  68         continueString = "";
  69     }
  70
  71     return wantedArticle;
  72 }