8 #include "ArticleCollection.h"
9 #include "ToGraphvizWriter.h"
10 #include "WikiWalker.h"
15 using CmdOpt
= WikiWalker::CommandLineParserBase::CommandLineOptions
;
17 //! limit for printing article links / contents
18 const int printLimit
= 10;
20 int main(int argc
, char** argv
)
22 #if defined(WW_USE_BOOST_PO)
23 WikiWalker::BoostPoCommandLineParser cmdp
;
24 #elif defined(WW_USE_GETOPT)
25 WikiWalker::GetoptCommandLineParser cmdp
;
29 cmdp
.parse(argc
, argv
);
30 } catch(std::exception
& e
) {
31 std::cerr
<< std::endl
<< e
.what() << std::endl
;
36 if(cmdp
.hasSet(CmdOpt::Version
)) {
37 std::cout
<< "WikiWalker, version " << _WW_VERSION
<< std::endl
;
41 if(cmdp
.hasSet(CmdOpt::Help
)) {
46 bool isUrlSet
= cmdp
.hasSet(CmdOpt::URL
);
47 bool isCacheSet
= cmdp
.hasSet(CmdOpt::JsonCache
);
48 bool isDotSet
= cmdp
.hasSet(CmdOpt::DotOut
);
49 bool isDeepSet
= cmdp
.hasSet(CmdOpt::FetchDeep
);
50 bool validRunConfig
= isUrlSet
|| (isDotSet
&& isCacheSet
);
53 std::cerr
<< "Must either specify at least URL, "
54 << "or dot and cache file." << std::endl
;
59 if(isUrlSet
&& isDeepSet
&& !isCacheSet
) {
60 std::cerr
<< "Please specify a cache file when using \"deep\" option"
66 bool read_failed
= false;
67 WikiWalker::WikiWalker w
;
71 std::string cachefile
= cmdp
.getValue(CmdOpt::JsonCache
);
72 w
.readCache(cachefile
);
73 } catch(std::exception
& e
) {
74 std::cout
<< e
.what() << std::endl
;
85 std::string url
= cmdp
.getValue(CmdOpt::URL
);
86 w
.skipSslVerification(
87 cmdp
.hasSet(WikiWalker::CommandLineParserBase::CommandLineOptions::
88 SkipSslVerification
));
90 } catch(std::exception
& e
) {
91 std::cout
<< "Error: " << e
.what() << std::endl
;
97 std::string cachefile
= cmdp
.getValue(CmdOpt::JsonCache
);
99 cachefile
.append("_");
100 std::cout
<< "Reading from cache failed, write to " << cachefile
104 w
.writeCache(cachefile
);
105 } catch(std::exception
& e
) {
106 std::cout
<< "Error: " << e
.what() << std::endl
;
111 const WikiWalker::CollectionUtils::ArticleCollection
& ac
= w
.collection();
112 std::string outfile
= cmdp
.getValue(CmdOpt::DotOut
);
113 WikiWalker::ToGraphvizWriter tgw
;
114 std::ofstream
file(outfile
, std::ios::trunc
| std::ios::out
);
117 std::cerr
<< "Error opening dot out file for writing" << std::endl
;
119 tgw
.output(ac
, file
);
122 if(file
.bad() || file
.fail()) {
123 std::cerr
<< "Error during writing dot out file." << std::endl
;
131 WikiWalker::CollectionUtils::countAnalyzedArticles(w
.collection());
133 std::cout
<< "There are " << numArt
<< " analyzed articles."
134 << " Not printing them. (Limit: " << printLimit
<< ")."
137 for(auto& a
: w
.collection()) {
138 auto& art
= a
.second
;
140 std::cout
<< "Article " << a
.first
<< " is invalid or doesn't exist"
142 } else if(art
->analyzed()) {
143 std::cout
<< "Article " << a
.first
<< " has " << art
->countLinks()
144 << " links" << std::endl
;