2 // FilterChm.cs : Trivial implementation of a CHM filter.
5 // Miguel Cabrera <mfcabrer@unalmed.edu.co>
7 // Copyright (C) 2005 Miguel Cabrera
10 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
17 // The above copyright notice and this permission notice shall be included in
18 // all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 // DEALINGS IN THE SOFTWARE.
30 using System
.Collections
;
33 using HtmlAgilityPack
;
38 namespace Beagle
.Filters
{
40 public class FilterChm
: FilterHtml
{
46 RegisterSupportedTypes();
52 new protected void WalkHeadNodes (HtmlNode node
)
54 foreach (HtmlNode subnode
in node
.ChildNodes
) {
55 if (subnode
.NodeType
== HtmlNodeType
.Element
56 && subnode
.Name
== "title") {
57 String title
= WalkChildNodesForText (subnode
);
58 title
= HtmlEntity
.DeEntitize (title
);
59 //AddProperty (Beagle.Property.New ("dc:title", title));
62 if (subnode
.NodeType
== HtmlNodeType
.Element
63 && subnode
.Name
== "meta") {
64 string name
= subnode
.GetAttributeValue ("name", "");
65 string content
= subnode
.GetAttributeValue ("content", "");
66 if (name
!= "" && content
!= "")
67 AddProperty (Beagle
.Property
.New (name
, content
));
73 public void WalkTocFile(HtmlNode node
)
78 foreach (HtmlNode subnode
in node
.ChildNodes
) {
79 if (subnode
.NodeType
== HtmlNodeType
.Element
) {
80 switch (subnode
.Name
) {
83 WalkTocFile (subnode
);
97 public void WalkToc(HtmlNode node
)
100 switch (node
.NodeType
) {
102 case HtmlNodeType
.Document
:
103 case HtmlNodeType
.Element
:
105 if(node
.Name
== "li")
106 foreach(HtmlNode subnode
in node
.ChildNodes
)
107 HandleTocEntry(subnode
);
109 foreach(HtmlNode subnode
in node
.ChildNodes
)
120 public void HandleTocEntry(HtmlNode node
)
123 if(node
.Name
== "object") {
125 string attr
= node
.GetAttributeValue ("type", "");
127 if(String
.Compare(attr
,"text/sitemap",true) == 0)
128 foreach(HtmlNode subnode
in node
.ChildNodes
)
129 if(String
.Compare(subnode
.Name
,"param",true) == 0 &&
130 subnode
.GetAttributeValue("name","") == "Name" ){
132 AppendText(subnode
.GetAttributeValue("value",""));
144 void ReadHtml(TextReader reader
)
147 HtmlDocument doc
= new HtmlDocument ();
151 } catch (ArgumentNullException e
) {
152 /*Weird should not happend*/
153 //¿What should do here?
154 Logger
.Log
.Warn (e
.Message
);
160 WalkNodes (doc
.DocumentNode
);
166 override protected void DoOpen (FileInfo info
)
169 chmFile
= new ChmFile();
173 chmFile
.Load(info
.FullName
);
176 catch (Exception e
) {
178 Logger
.Log
.Warn ("Could not parse {0}: {1}",info
.Name
,e
.Message
);
191 override protected void DoPullProperties()
194 if(chmFile
.Title
!= "")
195 AddProperty (Beagle
.Property
.New ("dc:title", chmFile
.Title
));
201 override protected void DoPull()
203 //Logger.Log.Debug("FilterCHM: Parsing:" + chmFile.Title);
204 //chmFile.ParseContents(ReadHtml);
208 We only read the default file and the topic file
210 ReadHtml(chmFile
.GetDefaultFile());
212 HtmlDocument doc
= new HtmlDocument();
214 doc
.Load(chmFile
.GetTopicsFile());
216 WalkTocFile(doc
.DocumentNode
);
223 override protected void DoClose()
229 override protected void RegisterSupportedTypes()
231 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/x-chm"));