Compute lucene-style scores for our hits.
[beagle.git] / Filters / FilterDocbook.cs
blobdc2b8b2b87f81667b0ae97fdbbeafe2c0fcafb5d
1 //
2 // FilterDocbook.cs
3 //
4 // Copyright (C) 2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Xml;
30 using System.Text;
31 using System.Collections;
33 using Beagle.Util;
34 using Beagle.Daemon;
36 namespace Beagle.Filters
38 public class FilterDocbook : Filter
40 protected XmlTextReader reader;
42 protected string base_path;
43 protected string base_title;
45 protected Stack entries_stack = new Stack ();
47 protected class DocbookEntry {
48 public string Id = null;
49 public string Title = null;
50 public int Depth = -1;
51 public StringBuilder Content = new StringBuilder ();
54 //////////////////////////////////////////////////
56 public FilterDocbook ()
58 SnippetMode = false;
59 SetVersion (3);
61 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/docbook+xml"));
62 AddSupportedFlavor (FilterFlavor.NewFromExtension (".docbook"));
64 // FIXME: Uri/Extension mapping?
65 AddSupportedFlavor (new FilterFlavor ("file:///usr/share/doc/*", ".xml", null, 0));
68 ///////////////////////////////////////////////////
70 override protected void DoOpen (FileInfo info)
72 base_path = info.FullName;
73 reader = new XmlTextReader (Stream);
74 reader.XmlResolver = null;
77 override protected void DoPullProperties ()
79 Stopwatch watch = new Stopwatch ();
81 watch.Start ();
83 while (reader.Read ()) {
84 switch (reader.NodeType) {
85 case XmlNodeType.Element:
86 if (reader.Name.StartsWith ("sect") || reader.Name.StartsWith ("chapter")) {
87 string id = reader.GetAttribute ("id");
89 if (id != null && id != "") {
90 DocbookEntry entry = new DocbookEntry ();
91 entry.Id = id;
92 entry.Depth = reader.Depth;
93 entries_stack.Push (entry);
95 } else if (reader.Name == "title") {
96 reader.Read (); // Go to the text node
98 if (entries_stack.Count == 0 && base_title == null) {
99 // This is probably the book title
100 base_title = reader.Value;
101 } else if (entries_stack.Count > 0) {
102 DocbookEntry entry = (DocbookEntry) entries_stack.Peek ();
104 if (entry.Title == null)
105 entry.Title = reader.Value;
108 break;
110 case XmlNodeType.Text:
111 // Append text to the child indexable
112 if (entries_stack.Count > 0)
113 ((DocbookEntry) entries_stack.Peek ()).Content.Append (reader.Value);
115 // Append text to the main indexable
116 AppendText (reader.Value);
117 break;
119 case XmlNodeType.EndElement:
120 if (entries_stack.Count > 0 && ((DocbookEntry) entries_stack.Peek ()).Depth == reader.Depth) {
121 DocbookEntry entry, parent_entry = null;
123 entry = (DocbookEntry) entries_stack.Pop ();
125 if (entries_stack.Count > 0)
126 parent_entry = (DocbookEntry) entries_stack.Peek ();
128 Indexable indexable = new Indexable (UriFu.PathToFileUri (String.Format ("{0}#{1}", base_path, entry.Id)));
129 indexable.Type = "DocbookEntry";
130 indexable.MimeType = "text/plain";
131 indexable.AddProperty (Property.NewUnsearched ("fixme:id", entry.Id));
133 if (entry.Title != null)
134 indexable.AddProperty (Property.New ("dc:title", entry.Title));
136 // Add the docbook book title
137 if (base_title != null)
138 indexable.AddProperty (Property.NewUnsearched ("fixme:base_title", base_title));
140 // Add any parent (as in docbook parent entry, not beagle) data if we have it
141 if (parent_entry != null) {
142 indexable.AddProperty (Property.NewUnsearched ("fixme:parent_id", parent_entry.Id));
144 if (parent_entry.Title != null)
145 indexable.AddProperty (Property.NewUnsearched ("fixme:parent_title", parent_entry.Title));
149 StringReader content_reader = new StringReader (entry.Content.ToString ());
150 indexable.SetTextReader (content_reader);
152 AddChildIndexable (indexable);
154 break;
158 watch.Stop ();
160 // If we've successfully crawled the file but haven't
161 // found any indexables, we shouldn't consider it
162 // successfull at all.
163 if (ChildIndexables.Count == 0) {
164 Error ();
165 return;
168 Logger.Log.Debug ("Parsed docbook file in {0}", watch);
170 Finished ();