2 /// \file xmlcompactor.cc
3 /// Compact an XML file into a map of pretty xpaths and content
7 Copyright (C) 2010-2013, Chris Frey <cdfrey@foursquare.net>
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 See the GNU General Public License in the COPYING file at the
19 root directory of this project for more details.
22 #include "xmlcompactor.h"
28 std::ostream
& operator<<(std::ostream
&os
, XmlCompactor
&parser
)
34 XmlCompactor::XmlCompactor()
38 // ugly hack to pretty up the output
39 Glib::ustring
XmlCompactor::HackPath(const Glib::ustring
&path
)
41 const char *bad
[] = { "[0]", "[1]", "[2]", "[3]", "[4]",
42 "[5]", "[6]", "[7]", "[8]", "[9]" };
43 const char *good
[] = { "[00]", "[01]", "[02]", "[03]", "[04]",
44 "[05]", "[06]", "[07]", "[08]", "[09]" };
46 Glib::ustring p
= path
;
48 // strip off the ending "text()"
49 size_t pos
= p
.rfind("/text()");
50 if( pos
!= Glib::ustring::npos
&& (pos
+ 7) == p
.size() )
53 // remove leading name if possible
54 if( m_skip_prefix
.size() && p
.find(m_skip_prefix
) == 0 )
55 p
.replace(0, m_skip_prefix
.size(), "");
57 // remove leading slash
61 // convert single digit offsets to two
62 for( int i
= 0; i
< 10; i
++ ) {
64 while( (pos
= p
.find(bad
[i
])) != Glib::ustring::npos
) {
65 p
.replace(pos
, 3, good
[i
]);
72 bool XmlCompactor::WalkNodes(xmlpp::Node
*node
, content_handler handler
)
74 xmlpp::ContentNode
*content
= dynamic_cast<xmlpp::ContentNode
*>(node
);
76 if( content
->is_white_space() )
77 return true; // skip whitespace between content
78 if( !(this->*handler
)(content
) )
79 return false; // handler had a problem,stop processing
82 xmlpp::Node::NodeList list
= node
->get_children();
83 xmlpp::Node::NodeList::iterator i
= list
.begin();
84 for( ; i
!= list
.end(); ++i
) {
85 if( !WalkNodes(*i
, handler
) )
86 return false; // pass the "stop processing" msg down
91 bool XmlCompactor::DoMap(xmlpp::ContentNode
*content
)
93 (*this)[HackPath(content
->get_path())] = content
->get_content();
97 bool XmlCompactor::ComparePrefix(xmlpp::ContentNode
*content
)
99 Glib::ustring path
= content
->get_path();
101 if( m_common_prefix
.size() == 0 ) {
102 m_common_prefix
= path
;
105 // find max length of matching strings
106 size_t len
= min(m_common_prefix
.size(), path
.size());
109 if( m_common_prefix
[max
] != path
[max
] )
115 // there's no prefix available
116 m_common_prefix
.clear();
120 // snag the largest prefix!
121 m_common_prefix
= m_common_prefix
.substr(0, max
);
127 Glib::ustring
XmlCompactor::FindCommonPrefix()
129 m_common_prefix
.clear();
130 xmlpp::Node
*root
= get_document()->get_root_node();
131 WalkNodes(root
, &XmlCompactor::ComparePrefix
);
132 return m_common_prefix
;
135 void XmlCompactor::Map(const Glib::ustring
&skip
)
137 m_skip_prefix
= skip
;
138 xmlpp::Node
*root
= get_document()->get_root_node();
139 WalkNodes(root
, &XmlCompactor::DoMap
);
142 Glib::ustring
XmlCompactor::Value(const Glib::ustring
&key
)
144 iterator i
= find(key
);
150 XmlCompactor::content_set
XmlCompactor::Find(const Glib::ustring
&xpath
) const
152 using namespace xmlpp
;
156 Node
*root
= get_document()->get_root_node();
157 NodeSet nodes
= root
->find(xpath
);
159 NodeSet::iterator ni
= nodes
.begin();
160 for( ; ni
!= nodes
.end(); ++ni
) {
161 ContentNode
*cn
= dynamic_cast<ContentNode
*> (*ni
);
163 if( cn
&& !cn
->is_white_space() ) {
164 content
.push_back(cn
->get_content());
171 void XmlCompactor::Dump(std::ostream
&os
) const
173 for( const_iterator i
= begin(); i
!= end(); ++i
) {
174 os
<< i
->first
.raw() << ": " << i
->second
.raw() << "\n";
180 int main(int argc
, char *argv
[])
187 parser
.parse_stream(cin
);
188 cerr
<< "Common prefix: " << parser
.FindCommonPrefix() << endl
;
189 parser
.Map(argc
>= 2 ? argv
[1] : "");
190 cout
<< parser
<< endl
;
193 for( int i
= 2; i
< argc
; i
++ ) {
194 XmlCompactor::content_set content
= parser
.Find(argv
[i
]);
195 cout
<< "XPath: " << argv
[i
] << endl
;
196 cout
<< "Found " << content
.size() << " values" << endl
;
198 XmlCompactor::content_set::iterator ci
= content
.begin();
199 for( ; ci
!= content
.end(); ++ci
) {
200 cout
<< " " << (*ci
) << endl
;
205 catch( Glib::ConvertError
&e
) {
206 cerr
<< e
.what() << endl
;
209 catch( std::exception
&e
) {
210 cerr
<< e
.what() << endl
;