2 // IndexingServiceQueryable.cs
4 // Copyright (C) 2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 // The IndexingService has two modes of operation: one is through the standard
28 // message-passing system and one where a slightly-structured file is dropped
29 // into a known location on the filesystem.
31 // (1) Messaging: An IndexingServiceRequest message is sent containing URIs of
32 // items to remove and Indexables to add. This is more reliable, and is best
33 // for clients which will also be utilizing Beagle for searching.
35 // (2) Files: The file to be indexed is dropped into the ~/.beagle/ToIndex
36 // directory. Another file with the same name prepended with a period is
37 // also dropped into the directory. In that file is the metadata for the
38 // file being indexed. The first line is the URI of the data being indexed.
39 // The second line is the hit type. The third line is the mime type. Then
40 // there are zero or more properties in the form "type:key=value", where
41 // "type" is either 't' for text or 'k' for keyword. This method is a lot
42 // easier to use, but requires that Beagle have inotify support enabled to
46 using System
.Collections
;
48 using System
.Threading
;
53 namespace Beagle
.Daemon
.IndexingServiceQueryable
{
55 [QueryableFlavor (Name
="IndexingService", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
56 public class IndexingServiceQueryable
: LuceneQueryable
{
58 public IndexingServiceQueryable () : base ("IndexingServiceIndex")
60 Server
.RegisterRequestMessageHandler (typeof (IndexingServiceRequest
), new Server
.RequestMessageHandler (HandleMessage
));
63 public override void Start ()
67 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
70 private void StartWorker ()
72 string index_path
= Path
.Combine (PathFinder
.StorageDir
, "ToIndex");
74 if (!Directory
.Exists (index_path
))
75 Directory
.CreateDirectory (index_path
);
78 Inotify
.Subscribe (index_path
, OnInotifyEvent
, Inotify
.EventType
.CloseWrite
);
80 Logger
.Log
.Info ("Setting up an initial crawl of the IndexingService directory");
82 State
= QueryableState
.Crawling
;
84 IndexableGenerator generator
= new IndexableGenerator (GetIndexables (index_path
));
85 Scheduler
.Task task
= NewAddTask (generator
);
86 task
.Tag
= "IndexingService initial crawl";
87 ThisScheduler
.Add (task
);
89 State
= QueryableState
.Idle
;
92 private IEnumerable
GetIndexables (string path
)
94 foreach (FileInfo file
in DirectoryWalker
.GetFileInfos (path
)) {
95 if (file
.Name
[0] == '.')
98 if (File
.Exists (Path
.Combine (file
.DirectoryName
, "." + file
.Name
)))
99 yield return FileToIndexable (file
);
105 private Indexable
FileToIndexable (FileInfo data_file
)
107 FileInfo meta_file
= new FileInfo (Path
.Combine (data_file
.DirectoryName
, "." + data_file
.Name
));
108 FileStream meta_stream
;
111 meta_stream
= meta_file
.Open (FileMode
.Open
, FileAccess
.Read
, FileShare
.Read
);
112 } catch (FileNotFoundException
) {
113 // The meta file disappeared before we could
118 StreamReader reader
= new StreamReader (meta_stream
);
120 // First line of the file is a URI
121 string line
= reader
.ReadLine ();
125 uri
= new Uri (line
);
126 } catch (Exception e
) {
127 Logger
.Log
.Warn (e
, "IndexingService: Unable to parse URI in {0}:", meta_file
.FullName
);
128 meta_stream
.Close ();
132 Indexable indexable
= new Indexable (uri
);
133 indexable
.Timestamp
= data_file
.LastWriteTimeUtc
;
134 indexable
.ContentUri
= UriFu
.PathToFileUri (data_file
.FullName
);
135 indexable
.DeleteContent
= true;
137 // Second line is the hit type
138 line
= reader
.ReadLine ();
140 Logger
.Log
.Warn ("IndexingService: EOF reached trying to read hit type from {0}",
142 meta_stream
.Close ();
144 } else if (line
!= String
.Empty
)
145 indexable
.HitType
= line
;
147 // Third line is the mime type
148 line
= reader
.ReadLine ();
150 Logger
.Log
.Warn ("IndexingService: EOF reached trying to read mime type from {0}",
152 meta_stream
.Close ();
154 } else if (line
!= String
.Empty
)
155 indexable
.MimeType
= line
;
157 // Following lines are properties in "t:key=value" format
159 line
= reader
.ReadLine ();
161 if (line
!= null && line
!= String
.Empty
) {
162 bool keyword
= false;
166 else if (line
[0] != 't') {
167 Logger
.Log
.Warn ("IndexingService: Unknown property type: '{0}'", line
[0]);
171 int i
= line
.IndexOf ('=');
174 Logger
.Log
.Warn ("IndexingService: Unknown property line: '{0}'", line
);
178 // FIXME: We should probably handle date types
180 indexable
.AddProperty (Property
.NewUnsearched (line
.Substring (2, i
- 2),
181 line
.Substring (i
+ 1)));
183 indexable
.AddProperty (Property
.New (line
.Substring (2, i
- 2),
184 line
.Substring (i
+ 1)));
187 } while (line
!= null);
189 indexable
.LocalState
["MetaFile"] = meta_file
;
191 // Ok, we're finished with the meta file. It will be
192 // deleted in PostAddHook ().
193 meta_stream
.Close ();
198 // Bleh, we need to keep around a list of pending items to be
199 // indexed so that we don't actually index it twice because
200 // the order of the creation of the data file and meta file
202 private ArrayList pending_files
= new ArrayList ();
204 private void OnInotifyEvent (Inotify
.Watch watch
,
208 Inotify
.EventType type
)
213 if (subitem
[0] == '.') {
214 string data_file
= Path
.Combine (path
, subitem
.Substring (1));
216 lock (pending_files
) {
217 if (File
.Exists (data_file
) && ! pending_files
.Contains (data_file
)) {
218 pending_files
.Add (data_file
);
219 IndexFile (new FileInfo (data_file
));
223 string meta_file
= Path
.Combine (path
, "." + subitem
);
224 string data_file
= Path
.Combine (path
, subitem
);
226 lock (pending_files
) {
227 if (File
.Exists (meta_file
) && ! pending_files
.Contains (data_file
)) {
228 pending_files
.Add (data_file
);
229 IndexFile (new FileInfo (data_file
));
235 private void IndexFile (FileInfo data_file
)
237 Indexable indexable
= FileToIndexable (data_file
);
239 if (indexable
== null) // The file disappeared
242 Scheduler
.Task task
= NewAddTask (indexable
);
243 task
.Priority
= Scheduler
.Priority
.Immediate
;
244 ThisScheduler
.Add (task
);
247 protected override void PostAddHook (Indexable indexable
, IndexerAddedReceipt receipt
)
249 FileInfo meta_file
= (FileInfo
) indexable
.LocalState
["MetaFile"];
253 pending_files
.Remove (indexable
.ContentUri
.LocalPath
);
256 private class IndexableGenerator
: IIndexableGenerator
{
257 private IEnumerator to_add_enumerator
;
258 private int count
= -1, done_count
= 0;
260 public IndexableGenerator (IEnumerable to_add
)
262 this.to_add_enumerator
= to_add
.GetEnumerator ();
265 public IndexableGenerator (ICollection to_add
) : this ((IEnumerable
) to_add
)
267 this.count
= to_add
.Count
;
270 public Indexable
GetNextIndexable ()
272 return to_add_enumerator
.Current
as Indexable
;
275 public bool HasNextIndexable ()
278 return to_add_enumerator
.MoveNext ();
281 public string StatusName
{
284 return String
.Format ("IndexingService: {0}", done_count
);
286 return String
.Format ("IndexingService: {0} of {1}", done_count
, count
);
290 public void PostFlushHook ()
294 private ResponseMessage
HandleMessage (RequestMessage msg
)
296 IndexingServiceRequest isr
= (IndexingServiceRequest
) msg
;
298 foreach (Uri uri
in isr
.ToRemove
) {
299 Scheduler
.Task task
= NewRemoveTask (uri
);
300 ThisScheduler
.Add (task
);
303 // FIXME: There should be a way for the request to control the
304 // scheduler priority of the task.
306 if (isr
.ToAdd
.Count
> 0) {
307 IIndexableGenerator ind_gen
= new IndexableGenerator (isr
.ToAdd
);
308 Scheduler
.Task task
= NewAddTask (ind_gen
);
309 task
.Priority
= Scheduler
.Priority
.Immediate
;
310 ThisScheduler
.Add (task
);
313 // FIXME: There should be an asynchronous response (fired by a Scheduler.Hook)
314 // that fires when all of the items have been added to the index.
317 return new EmptyResponse ();