2 // FilterAbiword.cs : Trivial implementation of a Abiword-document filter.
4 // Author: Veerapuram Varadhan <vvaradhan@novell.com>
6 // Copyright (C) 2004 Novell, Inc.
10 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
17 // The above copyright notice and this permission notice shall be included in
18 // all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 // DEALINGS IN THE SOFTWARE.
30 using System
.Collections
;
38 using ICSharpCode
.SharpZipLib
.GZip
;
40 namespace Beagle
.Filters
{
42 public class FilterAbiWord
: Beagle
.Daemon
.Filter
{
47 public FilterAbiWord ()
49 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/x-abiword"));
53 // Process the <styles> ... </styles> nodes.
54 void StudyStyleNode (XmlTextReader reader
)
56 string styleName
= null;
57 int original_depth
= reader
.Depth
;
59 if (!reader
.IsEmptyElement
) {
61 while (reader
.Depth
> original_depth
) {
62 if (reader
.NodeType
== XmlNodeType
.Element
63 && reader
.Name
== "s") {
64 styleName
= reader
.GetAttribute ("name");
65 if (styleName
!= null &&
66 (styleName
.ToLower().IndexOf ("head") > -1 ||
67 styleName
.ToLower().IndexOf ("note") > -1))
68 hotStyles
[styleName
] = true;
75 // Process the props="blah:blah; blah:blah;" values
76 bool StudyPropsAttribute (string props
)
78 string[] propsTokens
= null;
79 string[] propAndValue
= null;
85 propsTokens
= props
.Split (';');
87 if (propsTokens
.Length
> 0) {
88 for (int i
= 0; i
< propsTokens
.Length
; i
++) {
90 propAndValue
= propsTokens
[i
].Split (':');
91 switch (propAndValue
[0].Trim()) {
93 if (propAndValue
[1] == "bold")
98 if (propAndValue
[1] == "italic")
102 case "text-decoration":
103 if (propAndValue
[1] == "underline")
108 return retVal
= true;
115 static bool NodeIsFreezing (String nodeName
)
117 return nodeName
== "text:footnote-citation";
120 static bool NodeBreaksTextAfter (String nodeName
)
122 return nodeName
== "p";
125 private Stack hot_nodes
= new Stack ();
126 private bool inSection
= false;
128 // Walk through the <section> ... </section> nodes
129 // and extract the texts.
130 bool WalkContentNodes (XmlTextReader reader
)
132 // total number of elements to read per-pull
133 const int total_elements
= 10;
134 int num_elements
= 0;
135 while (reader
.Read ()) {
136 if (reader
.Name
== "styles" &&
137 reader
.NodeType
== XmlNodeType
.Element
) {
138 StudyStyleNode (reader
);
140 } else if (!inSection
&& reader
.Name
!= "section")
143 switch (reader
.NodeType
) {
144 case XmlNodeType
.Element
:
145 // A node/text is hot if:
146 // (1) It is flagged with a hot style (header, footer and
148 // (2) It contains "hot" styled attributes.
150 if (reader
.Name
== "section") {
151 string type
= reader
.GetAttribute ("type");
152 if (type
== "header" ||
157 } else if (reader
.IsEmptyElement
) {
158 if (NodeBreaksTextAfter (reader
.Name
)) {
160 AppendStructuralBreak ();
165 // <c ....> text blah blah </c> overrides the
166 // formatting at the paragraph level.
167 if (reader
.Name
== "c") {
168 string val
= reader
.GetAttribute ("props");
169 isHot
= StudyPropsAttribute (val
);
170 //Console.WriteLine ("{0} is hot? {1}", val, isHot);
173 bool has_attr
= reader
.MoveToFirstAttribute ();
175 if (reader
.Name
== "style") {
176 if (hotStyles
.Contains (reader
.Value
))
180 has_attr
= reader
.MoveToNextAttribute ();
183 reader
.MoveToElement();
186 hot_nodes
.Push (isHot
);
191 if (NodeIsFreezing (reader
.Name
))
195 case XmlNodeType
.Text
:
196 string text
= reader
.Value
;
199 case XmlNodeType
.EndElement
:
200 if (NodeBreaksTextAfter (reader
.Name
)) {
202 AppendStructuralBreak ();
205 bool is_hot
= (bool) hot_nodes
.Pop ();
209 if (NodeIsFreezing (reader
.Name
))
211 if (reader
.Name
== "section")
216 if (num_elements
>= total_elements
) {
223 private void ExtractMetadata (XmlTextReader reader
)
229 while (reader
.Read()) {
230 if (!found
&& reader
.Name
== "metadata" && reader
.NodeType
== XmlNodeType
.Element
) {
232 depth
= reader
.Depth
;
236 if (found
&& reader
.Name
== "metadata" && reader
.NodeType
== XmlNodeType
.EndElement
)
239 if (found
&& reader
.Name
== "m" && reader
.Depth
> depth
) {
240 key
= reader
.GetAttribute ("key");
242 case "abiword.generator":
244 AddProperty (Beagle
.Property
.New ("fixme:appname", reader
.Value
));
247 case "dc.description":
249 AddProperty (Beagle
.Property
.New ("dc:description", reader
.Value
));
252 case "abiword.keywords":
254 AddProperty (Beagle
.Property
.New ("fixme:keywords", reader
.Value
));
259 AddProperty (Beagle
.Property
.New ("dc:relation", reader
.Value
));
264 AddProperty (Beagle
.Property
.New ("dc:rights", reader
.Value
));
269 AddProperty (Beagle
.Property
.New ("dc:source", reader
.Value
));
272 case "dc.contributor":
274 AddProperty (Beagle
.Property
.New ("dc:contributor", reader
.Value
));
279 AddProperty (Beagle
.Property
.New ("dc:subject", reader
.Value
));
284 AddProperty (Beagle
.Property
.New ("dc:creator", reader
.Value
));
289 AddProperty (Beagle
.Property
.New ("dc:coverage", reader
.Value
));
294 AddProperty (Beagle
.Property
.New ("dc:type", reader
.Value
));
299 AddProperty (Beagle
.Property
.New ("dc:language", reader
.Value
));
304 AddProperty (Beagle
.Property
.New ("dc:title", reader
.Value
));
309 AddProperty (Beagle
.Property
.New ("dc:publisher", reader
.Value
));
316 private XmlTextReader
BuildReader (string path
)
319 s
= new FileStream (path
,
325 s
= new GZipInputStream (s
);
327 return new XmlTextReader (s
);
330 XmlTextReader reader
= null;
331 override protected void DoOpen (FileInfo info
)
333 // Try to open the file as if it is gzip.
334 // If that fails, we conclude that it must
335 // just be a regular text file full of xml.
339 s
= new FileStream (info
.FullName
, FileMode
.Open
, FileAccess
.Read
, FileShare
.Read
);
341 z
= new GZipInputStream (s
);
345 } catch (Exception ex
) {
349 hotStyles
= new Hashtable ();
350 reader
= BuildReader (info
.FullName
);
353 override protected void DoPullProperties ()
355 XmlTextReader metaReader
= BuildReader (FileInfo
.FullName
);
357 ExtractMetadata (metaReader
);
359 } catch (Exception e
) {
362 Logger
.Log
.Error ("Exception occurred while reading meta-data from {0}",
364 Logger
.Log
.Debug (e
);
368 override protected void DoPull ()
370 if (reader
== null) {
375 if (WalkContentNodes (reader
)) {
379 } catch (Exception e
) {
382 Logger
.Log
.Error ("Exception occurred while reading contents from {0}",
384 Logger
.Log
.Debug (e
);