Oops, fix a broken part of the patch
[beagle.git] / beagled / IndexingServiceQueryable / IndexingServiceQueryable.cs
blobb2d711f3d20f6d96ffc4792e973966fa579a3be1
1 //
2 // IndexingServiceQueryable.cs
3 //
4 // Copyright (C) 2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 // The IndexingService has two modes of operation: one is through the standard
28 // message-passing system and one where a slightly-structured file is dropped
29 // into a known location on the filesystem.
31 // (1) Messaging: An IndexingServiceRequest message is sent containing URIs of
32 // items to remove and Indexables to add. This is more reliable, and is best
33 // for clients which will also be utilizing Beagle for searching.
35 // (2) Files: The file to be indexed is dropped into the ~/.beagle/ToIndex
36 // directory. Another file with the same name prepended with a period is
37 // also dropped into the directory. In that file is the metadata for the
38 // file being indexed. The first line is the URI of the data being indexed.
39 // The second line is the hit type. The third line is the mime type. Then
40 // there are zero or more properties in the form "type:key=value", where
41 // "type" is either 't' for text or 'k' for keyword. This method is a lot
42 // easier to use, but requires that Beagle have inotify support enabled to
43 // work.
45 using System;
46 using System.Collections;
47 using System.IO;
49 using Beagle.Daemon;
50 using Beagle.Util;
52 namespace Beagle.Daemon.IndexingServiceQueryable {
54 [QueryableFlavor (Name="IndexingService", Domain=QueryDomain.Local, RequireInotify=false)]
55 public class IndexingServiceQueryable : LuceneQueryable {
57 public IndexingServiceQueryable () : base ("IndexingServiceIndex")
59 Server.RegisterRequestMessageHandler (typeof (IndexingServiceRequest), new Server.RequestMessageHandler (HandleMessage));
62 public override void Start ()
64 string index_path = Path.Combine (PathFinder.StorageDir, "ToIndex");
66 if (!Directory.Exists (index_path))
67 Directory.CreateDirectory (index_path);
69 if (Inotify.Enabled)
70 Inotify.Subscribe (index_path, OnInotifyEvent, Inotify.EventType.CloseWrite);
72 Logger.Log.Info ("Scanning for files in the IndexingService directory...");
73 Stopwatch stopwatch = new Stopwatch ();
74 stopwatch.Start ();
75 int count = 0;
77 State = QueryableState.Crawling;
78 foreach (FileInfo file in DirectoryWalker.GetFileInfos (index_path)) {
79 if (file.Name[0] == '.')
80 continue;
82 if (File.Exists (Path.Combine (file.DirectoryName, "." + file.Name))) {
83 IndexFile (file, Scheduler.Priority.Delayed);
84 count++;
87 State = QueryableState.Idle;
89 stopwatch.Stop ();
90 Logger.Log.Info ("Indexed {0} Indexing Service items in {1}", count, stopwatch);
93 private void OnInotifyEvent (Inotify.Watch watch,
94 string path,
95 string subitem,
96 string srcpath,
97 Inotify.EventType type)
99 if (subitem == "")
100 return;
102 if (subitem[0] == '.') {
103 string data_file = Path.Combine (path, subitem.Substring (1));
105 if (File.Exists (data_file))
106 IndexFile (new FileInfo (data_file), Scheduler.Priority.Immediate);
107 } else {
108 string meta_file = Path.Combine (path, "." + subitem);
110 if (File.Exists (meta_file))
111 IndexFile (new FileInfo (Path.Combine (path, subitem)), Scheduler.Priority.Immediate);
115 private void IndexFile (FileInfo data_file, Scheduler.Priority priority)
117 FileInfo meta_file = new FileInfo (Path.Combine (data_file.DirectoryName, "." + data_file.Name));
118 FileStream meta_stream;
120 try {
121 meta_stream = meta_file.Open (FileMode.Open, FileAccess.Read, FileShare.Read);
122 } catch (FileNotFoundException) {
123 // The meta file disappeared before we could
124 // open it.
125 return;
128 StreamReader reader = new StreamReader (meta_stream);
130 // First line of the file is a URI
131 string line = reader.ReadLine ();
132 Uri uri;
134 try {
135 uri = new Uri (line);
136 } catch (Exception e) {
137 Logger.Log.Warn ("IndexingService: Unable to parse URI in {0}: {1}",
138 meta_file.FullName, e.Message);
139 meta_stream.Close ();
140 return;
143 Indexable indexable = new Indexable (uri);
144 indexable.Timestamp = data_file.LastWriteTimeUtc;
145 indexable.ContentUri = UriFu.PathToFileUri (data_file.FullName);
146 indexable.DeleteContent = true;
148 // Second line is the hit type
149 line = reader.ReadLine ();
150 if (line == null) {
151 Logger.Log.Warn ("IndexingService: EOF reached trying to read hit type from {0}",
152 meta_file.FullName);
153 meta_stream.Close ();
154 return;
155 } else if (line != String.Empty)
156 indexable.HitType = line;
158 // Third line is the mime type
159 line = reader.ReadLine ();
160 if (line == null) {
161 Logger.Log.Warn ("IndexingService: EOF reached trying to read mime type from {0}",
162 meta_file.FullName);
163 meta_stream.Close ();
164 return;
165 } else if (line != String.Empty)
166 indexable.MimeType = line;
168 // Following lines are properties in "t:key=value" format
169 do {
170 line = reader.ReadLine ();
172 if (line != null && line != String.Empty) {
173 bool keyword = false;
175 if (line[0] == 'k')
176 keyword = true;
177 else if (line[0] != 't') {
178 Logger.Log.Warn ("IndexingService: Unknown property type: '{0}'", line[0]);
179 continue;
182 int i = line.IndexOf ('=');
184 if (i == -1) {
185 Logger.Log.Warn ("IndexingService: Unknown property line: '{0}'", line);
186 continue;
189 // FIXME: We should probably handle date types
190 if (keyword) {
191 indexable.AddProperty (Property.NewUnsearched (line.Substring (2, i - 2),
192 line.Substring (i + 1)));
193 } else {
194 indexable.AddProperty (Property.New (line.Substring (2, i - 2),
195 line.Substring (i + 1)));
198 } while (line != null);
200 // Ok, we're finished with the meta file. We can close and delete it.
201 meta_stream.Close ();
202 meta_file.Delete ();
204 Scheduler.Task task = NewAddTask (indexable);
205 task.Priority = priority;
206 ThisScheduler.Add (task);
209 private class IndexableGenerator : IIndexableGenerator {
210 private IEnumerator to_add_enumerator;
211 private int count, done_count = 0;
213 public IndexableGenerator (ICollection to_add)
215 this.count = to_add.Count;
216 this.to_add_enumerator = to_add.GetEnumerator ();
219 public Indexable GetNextIndexable ()
221 return to_add_enumerator.Current as Indexable;
224 public bool HasNextIndexable ()
226 ++done_count;
227 return to_add_enumerator.MoveNext ();
230 public string StatusName {
231 get { return String.Format ("IndexingService: {0} of {1}", done_count, count); }
234 public void PostFlushHook ()
238 private ResponseMessage HandleMessage (RequestMessage msg)
240 IndexingServiceRequest isr = (IndexingServiceRequest) msg;
242 foreach (Uri uri in isr.ToRemove) {
243 Scheduler.Task task = NewRemoveTask (uri);
244 ThisScheduler.Add (task);
247 // FIXME: There should be a way for the request to control the
248 // scheduler priority of the task.
250 if (isr.ToAdd.Count > 0) {
251 IIndexableGenerator ind_gen = new IndexableGenerator (isr.ToAdd);
252 Scheduler.Task task = NewAddTask (ind_gen);
253 task.Priority = Scheduler.Priority.Immediate;
254 ThisScheduler.Add (task);
257 // FIXME: There should be an asynchronous response (fired by a Scheduler.Hook)
258 // that fires when all of the items have been added to the index.
260 // No response
261 return new EmptyResponse ();