Tokenize 001234 as 1234. Include a testing function in NoiseFilter to figure out...
[beagle.git] / beagled / MonodocQueryable / MonodocQueryable.cs
blobb945839ffd49a47adf0d4903ce1515bd42192196
1 //
2 // MonodocQueryable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
6 // Authors:
7 // Fredrik Hedberg (fredrik.hedberg@avafan.com)
8 //
11 // Permission is hereby granted, free of charge, to any person obtaining a
12 // copy of this software and associated documentation files (the "Software"),
13 // to deal in the Software without restriction, including without limitation
14 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 // and/or sell copies of the Software, and to permit persons to whom the
16 // Software is furnished to do so, subject to the following conditions:
18 // The above copyright notice and this permission notice shall be included in
19 // all copies or substantial portions of the Software.
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 // DEALINGS IN THE SOFTWARE.
30 using System;
31 using System.IO;
32 using System.Xml;
33 using System.Text;
34 using System.Threading;
36 using Beagle.Daemon;
37 using Beagle.Util;
39 using ICSharpCode.SharpZipLib.Zip;
41 namespace Beagle.Daemon.MonodocQueryable {
43 [QueryableFlavor (Name="Monodoc", Domain=QueryDomain.Local, RequireInotify=false)]
44 public class MonodocQueryable : LuceneQueryable {
46 private static Logger log = Logger.Get ("MonodocQueryable");
48 string monodoc_dir;
49 int monodoc_wd;
51 public MonodocQueryable () : base ("MonodocIndex")
53 monodoc_dir = "/usr/lib/monodoc/sources"; // FIXME Make use of autoconf
56 /////////////////////////////////////////////
58 public override void Start ()
60 if (! (Directory.Exists (monodoc_dir)))
61 return;
63 base.Start ();
65 ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
68 private void StartWorker ()
70 log.Info ("Scanning Monodoc sources");
71 Stopwatch timer = new Stopwatch ();
72 timer.Start ();
74 int foundSources = 0;
75 int foundTypes = 0;
77 DirectoryInfo root = new DirectoryInfo (monodoc_dir);
79 if (Inotify.Enabled) {
80 monodoc_wd = Inotify.Watch (root.FullName, Inotify.EventType.CloseWrite | Inotify.EventType.CreateFile);
81 Inotify.Event += OnInotifyEvent;
82 } else {
83 FileSystemWatcher fsw = new FileSystemWatcher ();
84 fsw.Path = monodoc_dir;
85 fsw.Filter = "*.zip";
87 fsw.Changed += new FileSystemEventHandler (OnChangedEvent);
88 fsw.Created += new FileSystemEventHandler (OnChangedEvent);
90 fsw.EnableRaisingEvents = true;
93 foreach (FileInfo file in root.GetFiles ("*.zip")) {
94 int result = IndexArchive (file, Scheduler.Priority.Delayed);
95 if (result != -1) {
96 foundSources++;
97 foundTypes += result;
101 timer.Stop ();
102 log.Info ("Found {0} types in {1} Monodoc sources in {2}", foundTypes, foundSources, timer);
105 /////////////////////////////////////////////
107 private void OnInotifyEvent (int wd,
108 string path,
109 string subitem,
110 string srcpath,
111 Inotify.EventType type)
113 if (wd != monodoc_wd)
114 return;
116 if (subitem == "")
117 return;
119 if (Path.GetExtension (subitem) != ".zip")
120 return;
122 string full_path = Path.Combine (path, subitem);
124 switch (type) {
125 case Inotify.EventType.CloseWrite:
126 case Inotify.EventType.CreateFile:
127 IndexArchive (new FileInfo (full_path), Scheduler.Priority.Delayed);
128 break;
132 private void OnChangedEvent (object o, FileSystemEventArgs args)
134 IndexArchive (new FileInfo (args.FullPath), Scheduler.Priority.Delayed);
137 /////////////////////////////////////////////
139 int IndexArchive (FileInfo file, Scheduler.Priority priority)
141 if (this.FileAttributesStore.IsUpToDate (file.FullName))
142 return -1;
144 log.Debug ("Scanning Monodoc source file " + file);
146 Scheduler.TaskGroup group = NewMarkingTaskGroup (file.FullName, file.LastWriteTime);
148 int countTypes = 0;
149 ZipFile archive = new ZipFile (file.ToString());
151 foreach (ZipEntry entry in archive)
153 if (entry.Name.IndexOf (".") != -1)
154 continue;
156 XmlDocument document = new XmlDocument ();
157 document.Load (archive.GetInputStream (entry));
159 XmlNode type = document.SelectSingleNode ("/Type");
161 if (type == null)
162 continue;
164 Indexable typeIndexable = TypeNodeToIndexable(type,file);
166 Scheduler.Task typeTask = NewAddTask (typeIndexable);
167 typeTask.Priority = priority;
168 typeTask.SubPriority = 0;
169 typeTask.AddTaskGroup (group);
170 ThisScheduler.Add (typeTask);
172 foreach(XmlNode member in type.SelectNodes("Members/Member"))
174 Indexable memberIndexable = MemberNodeToIndexable(
175 member,
176 file,
177 type.Attributes["FullName"].Value);
179 Scheduler.Task memberTask = NewAddTask (memberIndexable);
180 memberTask.Priority = priority;
181 memberTask.SubPriority = 0;
182 memberTask.AddTaskGroup (group);
183 ThisScheduler.Add (memberTask);
185 countTypes++;
188 return countTypes;
191 Indexable TypeNodeToIndexable(XmlNode node,FileInfo file)
193 Indexable indexable = new Indexable(
194 new Uri ("monodoc:///" + file + ";item=T:"+node.Attributes["FullName"].Value));
196 indexable.MimeType = "application/monodoc";
197 indexable.Type = "Monodoc";
199 indexable.AddProperty (Property.NewUnsearched ("fixme:type", "type"));
200 indexable.AddProperty (Property.NewUnsearched ("fixme:name", "T:" + node.Attributes["FullName"].Value));
202 string splitname = String.Join (" ",
203 StringFu.FuzzySplit (node.Attributes["FullName"].Value.ToString ()));
204 indexable.AddProperty (Property.NewKeyword ("fixme:splitname",splitname));
206 // Should we add other stuff here? Implemented interfaces etc?
208 StringReader reader = new StringReader (node.SelectSingleNode ("Docs").InnerXml);
209 indexable.SetTextReader (reader);
211 return indexable;
214 Indexable MemberNodeToIndexable(XmlNode node, FileInfo file, string parentName)
216 char memberType = MemberTypeToChar (node.SelectSingleNode ("MemberType").InnerText);
217 StringBuilder memberFullName = new StringBuilder ();
219 memberFullName.Append (memberType + ":"+ parentName);
221 if (memberType != 'C')
222 memberFullName.Append ("." + node.Attributes["MemberName"].Value);
224 if (memberType == 'C' || memberType == 'M' || memberType == 'E')
226 memberFullName.Append ("(");
227 bool inside = false;
229 foreach (XmlNode parameter in node.SelectNodes ("Parameters/Parameter"))
231 if (!inside) inside = true; else memberFullName.Append(",");
232 memberFullName.Append (parameter.Attributes["Type"].Value);
235 memberFullName.Append (")");
238 Indexable indexable = new Indexable (
239 new Uri ("monodoc:///" + file + ";item=" + memberFullName));
241 indexable.MimeType = "application/monodoc";
242 indexable.Type = "Monodoc";
244 indexable.AddProperty (
245 Property.NewUnsearched ("fixme:type", node.SelectSingleNode ("MemberType").InnerText.ToLower ()));
246 indexable.AddProperty (
247 Property.New ("fixme:name",memberFullName));
249 int indexHack = memberFullName.ToString ().IndexOf ("(");
250 string splitname;
252 if (indexHack == -1)
253 splitname = String.Join (" ", StringFu.FuzzySplit (memberFullName.ToString ().Substring (2)));
254 else
255 splitname = String.Join (" ", StringFu.FuzzySplit (memberFullName.ToString ().Substring(2,indexHack-2)));
257 indexable.AddProperty (
258 Property.NewKeyword ("fixme:splitname",splitname));
260 StringReader reader = new StringReader (node.SelectSingleNode ("Docs").InnerXml);
261 indexable.SetTextReader (reader);
263 return indexable;
266 char MemberTypeToChar (string memberType)
268 switch (memberType) {
269 case "Constructor":
270 return 'C';
271 case "Event":
272 return 'E';
273 case "Property":
274 return 'P';
275 case "Field":
276 return 'F';
277 case "Method":
278 return 'M';
279 default:
280 return 'U';