if branch for links json data
[dueringa_WikiWalker.git] / src / WikiWalker.cpp
blob1c1388be3be29d50c46f0460852cd8542a051033
1 //! \file WikiWalker.cpp
3 #include "WikiWalker.h"
5 #include <cassert>
6 #include <fstream>
7 #include <iostream>
9 #include "LUrlParser.h"
11 #include "JsonSerializer.h"
12 #include "StringUtils.h"
13 #include "WalkerException.h"
14 #include "WikimediaApi.h"
16 // since the class is names like the namespace, this is a bit awkward...
18 namespace WikiWalker
20 void WikiWalker::start(const std::string& url)
22 auto info = WikimediaApiUtils::parseArticleUrl(url);
23 WikimediaApi wapi(info.apiBaseUrl);
25 wapi.fetchForwardLinks(info.articleTitle,
26 WikimediaApi::WikimediaGenerator::NoGenerator,
27 articleSet_);
29 if(fetchGenerator_) {
30 wapi.fetchForwardLinks(
31 info.articleTitle,
32 WikimediaApi::WikimediaGenerator::ForwardLinkGenerator,
33 articleSet_);
37 void WikiWalker::readCache(const std::string& cacheFile)
39 JsonSerializer jser;
40 std::ifstream cache(cacheFile);
42 // assumption: having write-only access to a file is so rare that I don't
43 // care also, currently the file is used for both read and write, so
44 // initially it won't exist.
45 if(!cache.is_open()) {
46 return;
49 jser.deserialize(articleSet_, cache);
51 // doesn't work anymore since jsoncpp class changed
52 // assert(cache.eof());
54 if(cache.fail()) {
55 cache.close();
56 throw WalkerException("Error reading from file");
60 void WikiWalker::writeCache(const std::string& cacheFile)
62 JsonSerializer w;
64 std::ofstream cache(cacheFile, std::ios::trunc);
66 if(!cache.is_open()) {
67 throw WalkerException("Error writing to cache file. Check permissions.");
70 w.serialize(articleSet_, cache);
72 if(cache.fail() || cache.bad()) {
73 cache.close();
74 throw WalkerException("I/O eception when writing to cache file");
77 cache.flush();
78 cache.close();
80 } // namespace WikiWalker