Compute lucene-style scores for our hits.
[beagle.git] / Filters / FilterChm.cs
blob3241ad93cc43e25f0a6839c801a9a3dcd3a6f47f
1 //
2 // FilterChm.cs : Trivial implementation of a CHM filter.
3 //
4 // Author :
5 // Miguel Cabrera <mfcabrer@unalmed.edu.co>
6 //
7 // Copyright (C) 2005 Miguel Cabrera
8 //
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
17 // The above copyright notice and this permission notice shall be included in
18 // all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 // DEALINGS IN THE SOFTWARE.
29 using System;
30 using System.Collections;
31 using System.IO;
32 using System.Text;
33 using HtmlAgilityPack;
35 using Beagle.Util;
36 using Beagle.Daemon;
38 namespace Beagle.Filters {
40 public class FilterChm : FilterHtml {
42 ChmFile chmFile;
44 public FilterChm ()
46 RegisterSupportedTypes();
47 SnippetMode= true;
52 new protected void WalkHeadNodes (HtmlNode node)
54 foreach (HtmlNode subnode in node.ChildNodes) {
55 if (subnode.NodeType == HtmlNodeType.Element
56 && subnode.Name == "title") {
57 String title = WalkChildNodesForText (subnode);
58 title = HtmlEntity.DeEntitize (title);
59 //AddProperty (Beagle.Property.New ("dc:title", title));
60 AppendText (title);
62 if (subnode.NodeType == HtmlNodeType.Element
63 && subnode.Name == "meta") {
64 string name = subnode.GetAttributeValue ("name", "");
65 string content = subnode.GetAttributeValue ("content", "");
66 if (name != "" && content != "")
67 AddProperty (Beagle.Property.New (name, content));
73 public void WalkTocFile(HtmlNode node)
78 foreach (HtmlNode subnode in node.ChildNodes) {
79 if (subnode.NodeType == HtmlNodeType.Element) {
80 switch (subnode.Name) {
81 case "html":
82 case "head":
83 WalkTocFile (subnode);
84 break;
85 case "body":
86 default:
87 WalkToc (subnode);
88 break;
97 public void WalkToc(HtmlNode node)
100 switch (node.NodeType) {
102 case HtmlNodeType.Document:
103 case HtmlNodeType.Element:
105 if(node.Name == "li")
106 foreach(HtmlNode subnode in node.ChildNodes)
107 HandleTocEntry(subnode);
109 foreach(HtmlNode subnode in node.ChildNodes)
110 WalkToc(subnode);
111 break;
120 public void HandleTocEntry(HtmlNode node)
123 if(node.Name == "object") {
125 string attr = node.GetAttributeValue ("type", "");
127 if(String.Compare(attr,"text/sitemap",true) == 0)
128 foreach(HtmlNode subnode in node.ChildNodes)
129 if(String.Compare(subnode.Name,"param",true) == 0 &&
130 subnode.GetAttributeValue("name","") == "Name" ){
131 HotUp();
132 AppendText(subnode.GetAttributeValue("value",""));
133 HotDown();
144 void ReadHtml(TextReader reader)
147 HtmlDocument doc = new HtmlDocument ();
149 try {
150 doc.Load (reader);
151 } catch (ArgumentNullException e) {
152 /*Weird should not happend*/
153 //¿What should do here?
154 Logger.Log.Warn (e.Message);
155 return;
159 if (doc != null)
160 WalkNodes (doc.DocumentNode);
166 override protected void DoOpen (FileInfo info)
169 chmFile = new ChmFile();
171 try {
173 chmFile.Load(info.FullName);
176 catch (Exception e) {
178 Logger.Log.Warn ("Could not parse {0}: {1}",info.Name,e.Message);
179 Finished ();
180 return;
191 override protected void DoPullProperties()
194 if(chmFile.Title != "")
195 AddProperty (Beagle.Property.New ("dc:title", chmFile.Title));
201 override protected void DoPull()
203 //Logger.Log.Debug("FilterCHM: Parsing:" + chmFile.Title);
204 //chmFile.ParseContents(ReadHtml);
208 We only read the default file and the topic file
210 ReadHtml(chmFile.GetDefaultFile());
212 HtmlDocument doc = new HtmlDocument();
214 doc.Load(chmFile.GetTopicsFile());
216 WalkTocFile(doc.DocumentNode);
218 Finished();
223 override protected void DoClose()
225 chmFile.Dispose();
229 override protected void RegisterSupportedTypes()
231 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-chm"));