(Back)port some changes from beagle-lucene-1-9-lockfile-branch: allow ext: queries...
[beagle.git] / Filters / FilterSvg.cs
blob81cee83ed695d76ce5d3683475ecfd6497b3c97f
1 //
2 // FilterSVG.cs
3 //
4 // Copyright (C) 2006 Alexander Macdonald <alex@alexmac.cc>
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Xml;
30 using System.Text;
32 using Beagle.Util;
33 using Beagle.Daemon;
35 namespace Beagle.Filters {
36 public class FilterSvg : Beagle.Daemon.Filter {
37 private StringBuilder sb = new StringBuilder ();
39 private enum RdfGrabModes {
40 TitleMode,
41 DescriptionMode,
42 DateMode,
43 Num
46 static private string [] rdf_grab_strings = { "title", "description", "date" };
47 static private string [] rdf_nongrab_strings = { "creator", "contributor", "publisher", "rights" };
49 // List of keys that should be ignored when adding to content.
50 // For example, dc:format is the mime type, so it's not interesting text.
51 static private string [] ignore_strings = { "format" };
53 public FilterSvg ()
55 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("image/svg+xml"));
56 AddSupportedFlavor (FilterFlavor.NewFromExtension (".svg"));
59 override protected void DoPullProperties ()
61 XmlTextReader reader = new XmlTextReader (Stream);
62 reader.XmlResolver = null;
64 int depth = 0;
65 bool grab_text = false, ignore_text = false;;
66 string text = "";
68 try {
69 while (reader.Read ()) {
70 switch (reader.NodeType) {
71 case XmlNodeType.Element:
72 if (grab_text)
73 break;
75 if (ArrayFu.IndexOfString (ignore_strings, reader.LocalName) != -1)
76 ignore_text = true;
77 else if (reader.LocalName == "title" || reader.LocalName == "desc") {
78 grab_text = true;
79 depth = reader.Depth;
80 } else if (reader.LocalName == "RDF")
81 PullRdfProperties (reader, reader.Depth);
83 break;
85 case XmlNodeType.Text:
86 text = reader.Value.Trim ();
87 if (text.Length == 0 || ignore_text)
88 break;
90 if (grab_text) {
91 sb.Append (text);
92 } else {
93 AppendText (text);
94 AppendStructuralBreak ();
96 break;
98 case XmlNodeType.Comment:
99 AppendText (reader.Value.Trim ());
100 AppendStructuralBreak ();
101 break;
103 case XmlNodeType.EndElement:
104 if (! (grab_text && depth == reader.Depth))
105 break;
107 grab_text = false;
108 ignore_text = false;
110 if (reader.LocalName == "title") {
111 AddProperty (Property.New ("dc:title", sb.ToString ()));
112 sb.Length = 0;
113 } else if (reader.LocalName == "desc") {
114 AddProperty (Property.New ("dc:description", sb.ToString ()));
115 sb.Length = 0;
117 break;
121 Finished ();
122 } catch (System.Xml.XmlException e) {
123 Logger.Log.Error ("error parsing xml file {0}", FileInfo.FullName);
124 Logger.Log.Debug (e);
125 Error ();
129 protected void PullRdfProperties (XmlTextReader reader, int depth)
131 int grab_mode = -1, nongrab_mode = -1;
132 bool grab_text = false, ignore_text = false, agent_mode = false;
134 string text = "";
136 try {
137 while (reader.Read ()) {
138 if (depth == reader.Depth)
139 return;
141 switch (reader.NodeType) {
142 case XmlNodeType.Element:
143 if (grab_text)
144 break;
146 if (ArrayFu.IndexOfString (ignore_strings, reader.LocalName) != -1)
147 ignore_text = true;
148 else if (reader.LocalName == "Agent")
149 grab_text = agent_mode = true;
150 else {
151 for (int i = 0; i < (int) RdfGrabModes.Num; i++) {
152 if (reader.LocalName == rdf_grab_strings [i]) {
153 grab_text = true;
154 grab_mode = i;
155 break;
159 for (int i = 0; i < (int) RdfGrabModes.Num; i++) {
160 if (reader.LocalName == rdf_nongrab_strings [i]) {
161 grab_text = false;
162 nongrab_mode = i;
163 break;
167 break;
169 case XmlNodeType.Text:
170 text = reader.Value.Trim ();
171 if (text.Length == 0 || ignore_text)
172 break;
174 if (grab_text) {
175 sb.Append(text);
176 } else {
177 AppendText (text);
178 AppendStructuralBreak ();
180 break;
182 case XmlNodeType.EndElement:
183 ignore_text = false;
185 if (agent_mode) {
186 if (reader.LocalName == "Agent") {
187 agent_mode = grab_text = false;
188 AddProperty (Property.New ("dc:" + rdf_nongrab_strings [nongrab_mode], sb.ToString ()));
189 sb.Length = 0;
191 } else if (grab_mode >= 0 && reader.LocalName == rdf_grab_strings [grab_mode]) {
192 if (grab_mode == (int) RdfGrabModes.DateMode) {
193 try {
194 AddProperty (Property.NewDate ("dc:date", System.Convert.ToDateTime (sb.ToString ())));
195 } catch (FormatException) {
196 AddProperty (Property.New ("dc:date", sb.ToString ()));
198 } else {
199 AddProperty (Property.New ("dc:" + rdf_grab_strings [grab_mode], sb.ToString ()));
201 sb.Length = 0;
202 grab_mode = -1;
203 grab_text = false;
204 } else if (nongrab_mode >= 0 && reader.LocalName == rdf_nongrab_strings [nongrab_mode]) {
205 nongrab_mode = -1;
206 grab_text = false;
208 break;
211 } catch (System.Xml.XmlException e) {
212 Logger.Log.Error ("error parsing embedded RDF {0}", FileInfo.FullName);
213 Logger.Log.Debug (e);
214 Error ();