2 // FilterAbiword.cs : Trivial implementation of a Abiword-document filter.
4 // Author: Veerapuram Varadhan <vvaradhan@novell.com>
6 // Copyright (C) 2004 Novell, Inc.
10 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
17 // The above copyright notice and this permission notice shall be included in
18 // all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 // DEALINGS IN THE SOFTWARE.
30 using System
.Collections
;
38 using ICSharpCode
.SharpZipLib
.GZip
;
40 namespace Beagle
.Filters
{
42 public class FilterAbiWord
: Beagle
.Daemon
.Filter
{
46 public FilterAbiWord ()
48 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/x-abiword"));
52 // Process the <styles> ... </styles> nodes.
53 void StudyStyleNode (XmlTextReader reader
)
55 string styleName
= null;
56 int original_depth
= reader
.Depth
;
58 if (!reader
.IsEmptyElement
) {
60 while (reader
.Depth
> original_depth
) {
61 if (reader
.NodeType
== XmlNodeType
.Element
62 && reader
.Name
== "s") {
63 styleName
= reader
.GetAttribute ("name");
64 if (styleName
!= null &&
65 (styleName
.ToLower().IndexOf ("head") > -1 ||
66 styleName
.ToLower().IndexOf ("note") > -1))
67 hotStyles
[styleName
] = true;
74 // Process the props="blah:blah; blah:blah;" values
75 bool StudyPropsAttribute (string props
)
77 string[] propsTokens
= null;
78 string[] propAndValue
= null;
84 propsTokens
= props
.Split (';');
86 if (propsTokens
.Length
> 0) {
87 for (int i
= 0; i
< propsTokens
.Length
; i
++) {
89 propAndValue
= propsTokens
[i
].Split (':');
90 switch (propAndValue
[0].Trim()) {
92 if (propAndValue
[1] == "bold")
97 if (propAndValue
[1] == "italic")
101 case "text-decoration":
102 if (propAndValue
[1] == "underline")
107 return retVal
= true;
114 static bool NodeIsFreezing (String nodeName
)
116 return nodeName
== "text:footnote-citation";
119 static bool NodeBreaksTextAfter (String nodeName
)
121 return nodeName
== "p";
124 private Stack hot_nodes
= new Stack ();
125 private bool inSection
= false;
127 // Walk through the <section> ... </section> nodes
128 // and extract the texts.
129 bool WalkContentNodes (XmlTextReader reader
)
131 // total number of elements to read per-pull
132 const int total_elements
= 10;
133 int num_elements
= 0;
134 while (reader
.Read ()) {
135 if (reader
.Name
== "styles" &&
136 reader
.NodeType
== XmlNodeType
.Element
) {
137 StudyStyleNode (reader
);
139 } else if (!inSection
&& reader
.Name
!= "section")
142 switch (reader
.NodeType
) {
143 case XmlNodeType
.Element
:
144 // A node/text is hot if:
145 // (1) It is flagged with a hot style (header, footer and
147 // (2) It contains "hot" styled attributes.
149 if (reader
.Name
== "section") {
150 string type
= reader
.GetAttribute ("type");
151 if (type
== "header" ||
156 } else if (reader
.IsEmptyElement
) {
157 if (NodeBreaksTextAfter (reader
.Name
)) {
159 AppendStructuralBreak ();
164 // <c ....> text blah blah </c> overrides the
165 // formatting at the paragraph level.
166 if (reader
.Name
== "c") {
167 string val
= reader
.GetAttribute ("props");
168 isHot
= StudyPropsAttribute (val
);
169 //Console.WriteLine ("{0} is hot? {1}", val, isHot);
172 bool has_attr
= reader
.MoveToFirstAttribute ();
174 if (reader
.Name
== "style") {
175 if (hotStyles
.Contains (reader
.Value
))
179 has_attr
= reader
.MoveToNextAttribute ();
182 reader
.MoveToElement();
185 hot_nodes
.Push (isHot
);
190 if (NodeIsFreezing (reader
.Name
))
194 case XmlNodeType
.Text
:
195 string text
= reader
.Value
;
198 case XmlNodeType
.EndElement
:
199 if (NodeBreaksTextAfter (reader
.Name
)) {
201 AppendStructuralBreak ();
204 bool is_hot
= (bool) hot_nodes
.Pop ();
208 if (NodeIsFreezing (reader
.Name
))
210 if (reader
.Name
== "section")
215 if (num_elements
>= total_elements
) {
222 private void ExtractMetadata (XmlTextReader reader
)
228 while (reader
.Read()) {
229 if (!found
&& reader
.Name
== "metadata" && reader
.NodeType
== XmlNodeType
.Element
) {
231 depth
= reader
.Depth
;
235 if (found
&& reader
.Name
== "metadata" && reader
.NodeType
== XmlNodeType
.EndElement
)
238 if (found
&& reader
.Name
== "m" && reader
.Depth
> depth
) {
239 key
= reader
.GetAttribute ("key");
241 case "abiword.generator":
243 AddProperty (Beagle
.Property
.New ("fixme:appname", reader
.Value
));
246 case "dc.description":
248 AddProperty (Beagle
.Property
.New ("dc:description", reader
.Value
));
251 case "abiword.keywords":
253 AddProperty (Beagle
.Property
.New ("fixme:keywords", reader
.Value
));
258 AddProperty (Beagle
.Property
.New ("dc:relation", reader
.Value
));
263 AddProperty (Beagle
.Property
.New ("dc:rights", reader
.Value
));
268 AddProperty (Beagle
.Property
.New ("dc:source", reader
.Value
));
271 case "dc.contributor":
273 AddProperty (Beagle
.Property
.New ("dc:contributor", reader
.Value
));
278 AddProperty (Beagle
.Property
.New ("dc:subject", reader
.Value
));
283 AddProperty (Beagle
.Property
.New ("dc:creator", reader
.Value
));
288 AddProperty (Beagle
.Property
.New ("dc:coverage", reader
.Value
));
293 AddProperty (Beagle
.Property
.New ("dc:type", reader
.Value
));
298 AddProperty (Beagle
.Property
.New ("dc:language", reader
.Value
));
303 AddProperty (Beagle
.Property
.New ("dc:title", reader
.Value
));
308 AddProperty (Beagle
.Property
.New ("dc:publisher", reader
.Value
));
315 XmlTextReader reader
= null;
316 override protected void DoOpen (FileInfo info
)
318 hotStyles
= new Hashtable ();
319 reader
= new XmlTextReader (info
.FullName
);
322 override protected void DoPullProperties ()
324 XmlTextReader metaReader
= new XmlTextReader (FileInfo
.FullName
);
326 ExtractMetadata (metaReader
);
328 } catch (Exception e
) {
331 Logger
.Log
.Error ("Exception occurred while reading meta-data from {0}",
333 Logger
.Log
.Debug (e
);
337 override protected void DoPull ()
339 if (reader
== null) {
344 if (WalkContentNodes (reader
)) {
348 } catch (Exception e
) {
351 Logger
.Log
.Error ("Exception occurred while reading contents from {0}",
353 Logger
.Log
.Debug (e
);