4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
29 using System
.Collections
;
37 using ICSharpCode
.SharpZipLib
.Zip
;
39 namespace Beagle
.Filters
{
41 public class FilterOpenOffice
: Beagle
.Daemon
.Filter
{
46 public FilterOpenOffice ()
49 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.sun.xml.writer"));
50 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.sun.xml.writer.template"));
51 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.sun.xml.calc"));
52 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.sun.xml.calc.template"));
53 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.sun.xml.impress"));
54 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.sun.xml.impress.template"));
55 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.sun.xml.draw"));
56 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.sun.xml.draw.template"));
59 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.oasis.opendocument.text"));
60 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.oasis.opendocument.text-template"));
61 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.oasis.opendocument.spreadsheet"));
62 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.oasis.opendocument.spreadsheet-template"));
63 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.oasis.opendocument.presentation"));
64 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.oasis.opendocument.presentation-template"));
65 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.oasis.opendocument.graphics"));
66 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.oasis.opendocument.graphics-template"));
72 // Parse the "style" nodes and mark appropriate styles as *HOT*
73 // FIXME: Identify and ADD more *HOT* styles. ;)
74 void StudyStyleNode (XmlReader reader
)
76 string style_name
= reader
.GetAttribute ("style:name");
77 string style_parent
= reader
.GetAttribute ("style:parent-style-name");
80 string underline
= null;
82 int original_depth
= reader
.Depth
;
84 if (!reader
.IsEmptyElement
) {
86 while (reader
.Depth
> original_depth
) {
87 if (reader
.NodeType
== XmlNodeType
.Element
88 && (reader
.Name
== "style:properties" ||
89 reader
.Name
== "style:text-properties")) { /* ODT changes */
90 weight
= reader
.GetAttribute ("fo:font-weight");
91 italic
= reader
.GetAttribute ("fo:font-style");
92 underline
= reader
.GetAttribute ("style:text-underline");
98 if ((style_parent
!= null && style_parent
.StartsWith("Heading"))
99 || (style_name
!= null && ((String
.Compare (style_name
, "Footnote") == 0)
100 || (String
.Compare (style_name
, "Endnote") == 0)
101 || (String
.Compare (style_name
, "Header") == 0)
102 || (String
.Compare (style_name
, "Footer") == 0)))
103 || (weight
!= null && weight
== "bold")
104 || (italic
!= null && italic
== "italic")
105 || (underline
!= null && underline
!= "none"))
106 hotStyles
[style_name
] = true;
109 static bool NodeIsLink (String nodeName
)
111 return nodeName
== "text:a";
114 static bool NodeIsHot (String nodeName
)
116 return nodeName
== "text:h";
119 // These container tags allows multiple-lines of texts and
120 // all of them should be marked *HOT* and hence called Container ;-)
121 static bool NodeIsHotContainer (String nodeName
)
123 return nodeName
== "office:annotation" ||
124 nodeName
== "text:footnote" ||
125 nodeName
== "text:endnote" ||
126 nodeName
== "text:note"; // "ODT format"
129 static bool NodeIsFreezing (String nodeName
)
131 return nodeName
== "text:footnote-citation"
132 || nodeName
== "text:endnote-citation"
133 || nodeName
== "text:note-citation"; // "ODT format"
137 static bool NodeBreaksTextBefore (String nodeName
)
139 return nodeName
== "text:footnote"
140 || nodeName
== "text:endnote"
141 || nodeName
== "office:annotation"
142 || nodeName
== "text:note"; // "ODT format"
145 static bool NodeBreaksTextAfter (String nodeName
)
147 return nodeName
== "text:s"
148 || nodeName
== "text:tab-stop"
149 || nodeName
== "table:table-cell";
152 static bool NodeBreaksStructureAfter (String nodeName
)
154 return nodeName
== "text:p"
155 || nodeName
== "text:h"
156 || nodeName
== "text:footnote"
157 || nodeName
== "text:endnote"
158 || nodeName
== "office:annotation"
159 || nodeName
== "text:note"; // "ODT format"
162 private Stack hot_nodes
= new Stack ();
163 private string strPartText
= "";
164 private bool bPartHotStyle
= false;
165 private Stack hot_container_nodes
= new Stack ();
167 void AddTextForIndexing (string paramStr
)
172 int index
= paramStr
.LastIndexOf (' ');
175 // During the previous-parsing, a word got terminatted partially,
176 // find the remaining part of the word, concatenate it and add it to
177 // the respective pools and reset the HOT status, if required.
178 if (strPartText
.Length
> 0) {
179 sindex
= paramStr
.IndexOf (' ');
180 strTemp
= strPartText
+ paramStr
.Substring (0, sindex
);
188 AppendText (strTemp
);
189 if (!wasHot
&& bPartHotStyle
)
191 bPartHotStyle
= false;
193 paramStr
= paramStr
.Substring (sindex
);
194 index
= paramStr
.LastIndexOf (' ');
199 strPartText
= paramStr
.Substring (index
);
200 paramStr
= paramStr
.Substring (sindex
, index
);
202 strTemp
= strPartText
+ paramStr
;
203 strPartText
= strTemp
;
208 // Enable *HOT* just before appending the text
209 // because, there can be some *Partial Texts* without
210 // *HOT* styles that needs to be appended.
211 if (hot_nodes
.Count
> 0 && (bool) hot_nodes
.Peek() == true) {
214 bPartHotStyle
= true;
216 bPartHotStyle
|= false;
218 if (paramStr
.Length
> 0)
219 AppendText (paramStr
);
221 if (strPartText
.Trim().Length
< 1)
222 bPartHotStyle
= false;
225 void FlushPartialText ()
227 if (strPartText
.Length
> 0) {
228 if (bPartHotStyle
&& !IsHot
)
230 AppendText (strPartText
);
235 bPartHotStyle
= false;
239 bool WalkContentNodes (XmlReader reader
)
241 // total number of elements to read per-pull
242 const int total_elements
= 10;
243 int num_elements
= 0;
245 while (reader
.Read ()) {
246 switch (reader
.NodeType
) {
247 case XmlNodeType
.Element
:
248 if (reader
.IsEmptyElement
) {
250 if (NodeBreaksStructureAfter (reader
.Name
))
251 AppendStructuralBreak ();
253 if (NodeBreaksTextBefore (reader
.Name
))
256 if (NodeBreaksTextAfter (reader
.Name
))
262 if (reader
.Name
== "style:style") {
263 StudyStyleNode (reader
);
268 // (1) It's name is hot
269 // (2) It is flagged with a hot style
270 // (3) annotations are always hot.
274 if (NodeIsHot (reader
.Name
)) {
276 } else if (NodeIsHotContainer (reader
.Name
)) {
277 hot_container_nodes
.Push (reader
.Name
);
278 } else if (NodeIsLink (reader
.Name
)) {
279 string attr
= reader
.GetAttribute ("xlink:href");
284 bool has_attr
= reader
.MoveToFirstAttribute ();
286 if (reader
.Name
.EndsWith(":style-name")) {
287 if (hotStyles
.Contains (reader
.Value
))
291 has_attr
= reader
.MoveToNextAttribute ();
293 reader
.MoveToElement();
296 hot_nodes
.Push (isHot
);
299 // i) Its already in *HOT* mode and
300 // ii) there is a hot style/hot container tag
301 if (!IsHot
&& (isHot
|| hot_container_nodes
.Count
> 0))
304 if (NodeIsFreezing (reader
.Name
)) {
308 if (NodeBreaksTextBefore (reader
.Name
)) {
314 case XmlNodeType
.Text
:
315 string text
= reader
.Value
;
319 AddTextForIndexing (text
);
322 case XmlNodeType
.EndElement
:
323 if (NodeBreaksStructureAfter (reader
.Name
)) {
325 AppendStructuralBreak ();
327 else if (NodeBreaksTextAfter (reader
.Name
))
331 if (hot_nodes
.Count
> 0)
332 is_hot
= (bool) hot_nodes
.Pop ();
334 Logger
.Log
.Debug ("FilterOpenOffice: hot_nodes underflow in {0}",
336 if (hot_container_nodes
.Count
> 0) {
337 string hot_container_tag
= (string) hot_container_nodes
.Peek ();
338 if (hot_container_tag
== reader
.Name
) {
339 hot_container_nodes
.Clear ();
347 if (NodeIsFreezing (reader
.Name
)) {
353 if (num_elements
>= total_elements
) {
360 // SlideCount is not stored in meta.xml rather we need to
361 // parse the whole of content.xml to find out the count of
362 // slides present in an .sxi.
363 private void ExtractSlideCount (XmlReader reader
)
365 string slideCount
= null;
370 // Do not parse the whole file if it is not a
371 // presentation (impress document)
372 if (reader
.Name
== "office:document-content"
373 && reader
.NodeType
== XmlNodeType
.Element
) {
374 string docClass
= reader
.GetAttribute ("office:class");
375 if (docClass
!= "presentation")
378 } while (reader
.Depth
< 2);
380 while (reader
.Depth
>= 1) {
381 if (reader
.Depth
!= 2 || reader
.NodeType
!= XmlNodeType
.Element
) {
385 switch (reader
.Name
) {
387 slideCount
= reader
.GetAttribute ("draw:id");
392 if (slideCount
!= null)
393 AddProperty (Beagle
.Property
.NewUnsearched ("fixme:slide-count", slideCount
));
396 private void ExtractMetadata (XmlReader reader
)
400 } while (reader
.Depth
< 2);
402 while (reader
.Depth
>= 2) {
403 if (reader
.Depth
!= 2 || reader
.NodeType
!= XmlNodeType
.Element
) {
407 switch (reader
.Name
) {
410 AddProperty (Beagle
.Property
.New ("dc:title",
414 case "dc:description":
417 AddProperty (Beagle
.Property
.New ("dc:description",
424 AddProperty (Beagle
.Property
.New ("dc:subject",
428 case "meta:document-statistic":
429 string attr
= reader
.GetAttribute ("meta:page-count");
431 AddProperty (Beagle
.Property
.NewUnsearched ("fixme:page-count", attr
));
432 attr
= reader
.GetAttribute ("meta:word-count");
434 AddProperty (Beagle
.Property
.NewUnsearched ("fixme:word-count", attr
));
436 // Both writer and calc uses this attribute. writer stores the
437 // count of tables in a sxw whereas calc stores the count of
438 // spreadsheets in a sxc.
439 attr
= reader
.GetAttribute ("meta:table-count");
440 if (attr
!= null && Convert
.ToInt32 (attr
) > 0
441 && MimeType
== "application/vnd.sun.xml.calc")
442 AddProperty (Beagle
.Property
.NewUnsearched ("fixme:spreadsheet-count", attr
));
445 case "meta:user-defined":
446 string name
= reader
.GetAttribute ("meta:name");
449 if (reader
.Value
!= "") {
450 AddProperty (Beagle
.Property
.New ("fixme:UserDefined-" + name
,
457 AddProperty (Beagle
.Property
.New ("fixme:keywords",
468 override protected void DoOpen (FileInfo info
)
470 hotStyles
= new Hashtable ();
472 zip
= new ZipFile (info
.FullName
);
474 if (MimeType
.StartsWith ("application/vnd.oasis.opendocument."))
477 } catch (Exception e
) {
478 Logger
.Log
.Error ("Unable to open {0}. Probably an invalid OpenOffice document.",
484 override protected void DoPullProperties ()
487 ZipEntry entry
= zip
.GetEntry ("meta.xml");
489 Stream meta_stream
= zip
.GetInputStream (entry
);
490 XmlReader reader
= new XmlTextReader (meta_stream
);
491 ExtractMetadata (reader
);
493 Logger
.Log
.Error ("No meta.xml!");
495 entry
= zip
.GetEntry ("content.xml");
497 Stream contents_stream
= zip
.GetInputStream (entry
);
498 XmlReader reader
= new XmlTextReader (contents_stream
);
499 ExtractSlideCount (reader
);
501 Logger
.Log
.Error ("No content.xml!");
506 XmlReader content_reader
= null;
507 XmlReader style_reader
= null;
508 override protected void DoPull ()
511 // We need both styles.xml and content.xml as
512 // "Header", "Footer" are stored in styles.xml and
513 // "[Foot/End]Notes are stored in content.xml
514 if ((content_reader
== null) && (style_reader
== null)) {
516 ZipEntry entry
= zip
.GetEntry ("content.xml");
517 ZipEntry entry1
= zip
.GetEntry ("styles.xml");
519 if ((entry
!= null) && (entry1
!= null)) {
520 Stream content_stream
= zip
.GetInputStream (entry
);
521 Stream style_stream
= zip
.GetInputStream (entry1
);
522 content_reader
= new XmlTextReader (content_stream
);
523 style_reader
= new XmlTextReader (style_stream
);
527 if ((content_reader
== null) && (style_reader
== null)) {
532 // Note: Do not change the order.
533 // we need to populate our hotStyles table with all posible hot styles.
534 // Since, "footnotes" and "endnotes" gets stored in content.xml and these
535 // styles needs to be marked as *HOT*, they need to be processed before contents.
536 if ((WalkContentNodes (style_reader
)) && (WalkContentNodes (content_reader
)))