if branch for links json data
[dueringa_WikiWalker.git] / src / main.cpp
blob183dcf9858579657d491a10d9973e72d21b7a4da
1 //! \file main.cpp
3 #include <fstream>
4 #include <iostream>
5 #include <string>
7 #include "Article.h"
8 #include "ArticleCollection.h"
9 #include "ToGraphvizWriter.h"
10 #include "WikiWalker.h"
11 #include "version.h"
13 #include "config.h"
15 using CmdOpt = WikiWalker::CommandLineParserBase::CommandLineOptions;
17 //! limit for printing article links / contents
18 const int printLimit = 10;
20 int main(int argc, char** argv)
22 #if defined(WW_USE_BOOST_PO)
23 WikiWalker::BoostPoCommandLineParser cmdp;
24 #elif defined(WW_USE_GETOPT)
25 WikiWalker::GetoptCommandLineParser cmdp;
26 #endif
28 try {
29 cmdp.parse(argc, argv);
30 } catch(std::exception& e) {
31 std::cerr << std::endl << e.what() << std::endl;
32 cmdp.printHelp();
33 return -1;
36 if(cmdp.hasSet(CmdOpt::Version)) {
37 std::cout << "WikiWalker, version " << _WW_VERSION << std::endl;
38 return 0;
41 if(cmdp.hasSet(CmdOpt::Help)) {
42 cmdp.printHelp();
43 return 0;
46 bool isUrlSet = cmdp.hasSet(CmdOpt::URL);
47 bool isCacheSet = cmdp.hasSet(CmdOpt::JsonCache);
48 bool isDotSet = cmdp.hasSet(CmdOpt::DotOut);
49 bool isDeepSet = cmdp.hasSet(CmdOpt::FetchDeep);
50 bool validRunConfig = isUrlSet || (isDotSet && isCacheSet);
52 if(!validRunConfig) {
53 std::cerr << "Must either specify at least URL, "
54 << "or dot and cache file." << std::endl;
55 cmdp.printHelp();
56 return 1;
59 if(isUrlSet && isDeepSet && !isCacheSet) {
60 std::cerr << "Please specify a cache file when using \"deep\" option"
61 << std::endl;
62 cmdp.printHelp();
63 return 1;
66 bool read_failed = false;
67 WikiWalker::WikiWalker w;
69 if(isCacheSet) {
70 try {
71 std::string cachefile = cmdp.getValue(CmdOpt::JsonCache);
72 w.readCache(cachefile);
73 } catch(std::exception& e) {
74 std::cout << e.what() << std::endl;
75 read_failed = true;
79 if(isDeepSet) {
80 w.deep(true);
83 if(isUrlSet) {
84 try {
85 std::string url = cmdp.getValue(CmdOpt::URL);
86 w.skipSslVerification(
87 cmdp.hasSet(WikiWalker::CommandLineParserBase::CommandLineOptions::
88 SkipSslVerification));
89 w.start(url);
90 } catch(std::exception& e) {
91 std::cout << "Error: " << e.what() << std::endl;
92 return -1;
96 if(isCacheSet) {
97 std::string cachefile = cmdp.getValue(CmdOpt::JsonCache);
98 if(read_failed) {
99 cachefile.append("_");
100 std::cout << "Reading from cache failed, write to " << cachefile
101 << std::endl;
103 try {
104 w.writeCache(cachefile);
105 } catch(std::exception& e) {
106 std::cout << "Error: " << e.what() << std::endl;
110 if(isDotSet) {
111 const WikiWalker::CollectionUtils::ArticleCollection& ac = w.collection();
112 std::string outfile = cmdp.getValue(CmdOpt::DotOut);
113 WikiWalker::ToGraphvizWriter tgw;
114 std::ofstream file(outfile, std::ios::trunc | std::ios::out);
116 if(file.fail()) {
117 std::cerr << "Error opening dot out file for writing" << std::endl;
118 } else {
119 tgw.output(ac, file);
120 file.flush();
122 if(file.bad() || file.fail()) {
123 std::cerr << "Error during writing dot out file." << std::endl;
126 file.close();
130 size_t numArt =
131 WikiWalker::CollectionUtils::countAnalyzedArticles(w.collection());
132 if(numArt > 10) {
133 std::cout << "There are " << numArt << " analyzed articles."
134 << " Not printing them. (Limit: " << printLimit << ")."
135 << std::endl;
136 } else {
137 for(auto& a : w.collection()) {
138 auto& art = a.second;
139 if(art->marked()) {
140 std::cout << "Article " << a.first << " is invalid or doesn't exist"
141 << std::endl;
142 } else if(art->analyzed()) {
143 std::cout << "Article " << a.first << " has " << art->countLinks()
144 << " links" << std::endl;
149 return 0;