Fix a bunch of memory problems in beagle:
[beagle.git] / Filters / FilterDocbook.cs
blob59c81977237e512f533d601aa4395f0e234bb6fc
1 //
2 // FilterDocbook.cs
3 //
4 // Copyright (C) 2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Xml;
30 using System.Text;
31 using System.Collections;
33 using Beagle.Util;
34 using Beagle.Daemon;
36 namespace Beagle.Filters
38 public class FilterDocbook : Filter
40 protected XmlTextReader reader;
42 protected string base_path;
43 protected string base_title;
44 protected string base_language;
46 protected Stack entries_stack = new Stack ();
48 protected class DocbookEntry {
49 public string Id = null;
50 public string Title = null;
51 public string Language = null;
52 public int Depth = -1;
53 public StringBuilder Content = new StringBuilder ();
56 //////////////////////////////////////////////////
58 public FilterDocbook ()
60 SnippetMode = false;
61 SetVersion (4);
63 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/docbook+xml"));
64 AddSupportedFlavor (FilterFlavor.NewFromExtension (".docbook"));
66 // FIXME: Uri/Extension mapping?
67 AddSupportedFlavor (new FilterFlavor ("file:///usr/share/doc/*", ".xml", null, 0));
70 ///////////////////////////////////////////////////
72 override protected void DoOpen (FileInfo info)
74 base_path = info.FullName;
75 reader = new XmlTextReader (Stream);
76 reader.XmlResolver = null;
79 override protected void DoPullProperties ()
81 Stopwatch watch = new Stopwatch ();
83 watch.Start ();
85 while (reader.Read ()) {
86 switch (reader.NodeType) {
87 case XmlNodeType.Element:
88 if (reader.Name.StartsWith ("sect") || reader.Name.StartsWith ("chapter")) {
89 string id = reader.GetAttribute ("id");
91 if (id != null && id != "") {
92 DocbookEntry entry = new DocbookEntry ();
93 entry.Id = id;
94 entry.Depth = reader.Depth;
96 string language = reader.GetAttribute ("lang");
98 if (language != null && language != "")
99 entry.Language = language;
101 entries_stack.Push (entry);
103 } else if (reader.Name == "article" || reader.Name == "book") {
104 string language = reader.GetAttribute ("lang");
106 if (language != null && language != "")
107 base_language = language;
108 } else if (reader.Name == "title") {
109 reader.Read (); // Go to the text node
111 if (entries_stack.Count == 0 && base_title == null) {
112 // This is probably the book title
113 base_title = reader.Value;
114 } else if (entries_stack.Count > 0) {
115 DocbookEntry entry = (DocbookEntry) entries_stack.Peek ();
117 if (entry.Title == null)
118 entry.Title = reader.Value;
121 break;
123 case XmlNodeType.Text:
124 // Append text to the child indexable
125 if (entries_stack.Count > 0)
126 ((DocbookEntry) entries_stack.Peek ()).Content.Append (reader.Value);
128 // Append text to the main indexable
129 AppendText (reader.Value);
130 break;
132 case XmlNodeType.EndElement:
133 if (entries_stack.Count > 0 && ((DocbookEntry) entries_stack.Peek ()).Depth == reader.Depth) {
134 DocbookEntry entry, parent_entry = null;
136 entry = (DocbookEntry) entries_stack.Pop ();
138 if (entries_stack.Count > 0)
139 parent_entry = (DocbookEntry) entries_stack.Peek ();
141 Indexable indexable = new Indexable (UriFu.PathToFileUri (String.Format ("{0}#{1}", base_path, entry.Id)));
142 indexable.HitType = "DocbookEntry";
143 indexable.MimeType = "text/x-docbook-entry";
144 indexable.Filtering = IndexableFiltering.AlreadyFiltered;
146 indexable.AddProperty (Property.NewUnsearched ("fixme:id", entry.Id));
148 if (entry.Title != null)
149 indexable.AddProperty (Property.New ("dc:title", entry.Title));
151 // Add the docbook book title
152 if (base_title != null)
153 indexable.AddProperty (Property.NewUnsearched ("fixme:base_title", base_title));
155 // Add the child language (or docbook language if none is specified)
156 if (entry.Language != null)
157 indexable.AddProperty (Property.NewUnsearched ("fixme:language", entry.Language));
158 else if (base_language != null)
159 indexable.AddProperty (Property.NewUnsearched ("fixme:language", base_language));
161 // Add any parent (as in docbook parent entry, not beagle) data if we have it
162 if (parent_entry != null) {
163 indexable.AddProperty (Property.NewUnsearched ("fixme:parent_id", parent_entry.Id));
165 if (parent_entry.Title != null)
166 indexable.AddProperty (Property.NewUnsearched ("fixme:parent_title", parent_entry.Title));
170 StringReader content_reader = new StringReader (entry.Content.ToString ());
171 indexable.SetTextReader (content_reader);
173 AddChildIndexable (indexable);
175 break;
179 // Add the common properties to the top-level
180 // file item such as Title, Language etc.
182 if (base_title != null)
183 AddProperty (Property.New ("dc:title", base_title));
185 if (base_language != null)
186 AddProperty (Property.NewUnsearched ("fixme:language", base_language));
188 watch.Stop ();
190 // If we've successfully crawled the file but haven't
191 // found any indexables, we shouldn't consider it
192 // successfull at all (unless we have a title, which
193 // means that it's actually a docbook file, just without
194 // sections.
195 if (ChildIndexables.Count == 0 && base_title == null) {
196 Error ();
197 return;
200 Logger.Log.Debug ("Parsed docbook file in {0}", watch);
202 Finished ();