Fix - allow port be specified in URL
[dueringa_WikiWalker.git] / util / FilterAnalyzedArticles.cpp
blobe4344aa490cf52cca1a11e9b509d3d078b8cea42
1 #include <algorithm>
2 #include <iostream>
3 #include <fstream>
4 #include <vector>
6 #include "Article.h"
7 #include "ArticleCollection.h"
8 #include "JsonSerializer.h"
9 #include "ToGraphvizWriter.h"
12 int main(int argc, char** argv)
14 if(argc != 3) {
15 std::cout << argv[0] << " "
16 << "<cache file>"
17 << "<dot file>" << std::endl;
18 return 1;
21 WikiWalker::JsonSerializer jser;
22 std::ifstream cache(argv[1]);
24 // assumption: having write-only access to a file is so rare that I don't care
25 // also, currently the file is used for both read and write, so initially it
26 // won't exist.
27 if(!cache.is_open()) {
28 std::cerr << "Couldn't open cache file" << std::endl;
29 return 1;
32 WikiWalker::CollectionUtils::ArticleCollection ac;
33 jser.deserialize(ac, cache);
35 if(cache.fail()) {
36 cache.close();
37 std::cerr << "Error reading from file" << std::endl;
38 return 1;
41 WikiWalker::CollectionUtils::ArticleCollection filteredColl;
42 for(auto& anArticle : ac) {
43 auto art = anArticle.second;
44 if(art->analyzed()) {
45 WikiWalker::CollectionUtils::add(filteredColl, art);
49 WikiWalker::ToGraphvizWriter g;
50 std::ofstream filteredGraph(argv[2]);
52 if(!filteredGraph.is_open()) {
53 std::cerr << "Couldn't open dot file for writing" << std::endl;
54 return 1;
57 g.output(filteredColl, filteredGraph);
59 if(filteredGraph.fail()) {
60 filteredGraph.close();
61 std::cerr << "Error writing to file" << std::endl;
62 return 1;
65 filteredGraph.close();
67 std::cout << "There are " << filteredColl.size() << " analyzed articles"
68 << std::endl;
70 return 0;