Fixed #374055:Only the first "tag" is detected in digikam.
[beagle.git] / beagled / IndexingServiceQueryable / IndexingServiceQueryable.cs
blobf08f2a8c2a681f757ca2c75abeb229b9dc4027df
1 //
2 // IndexingServiceQueryable.cs
3 //
4 // Copyright (C) 2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 // The IndexingService has two modes of operation: one is through the standard
28 // message-passing system and one where a slightly-structured file is dropped
29 // into a known location on the filesystem.
31 // (1) Messaging: An IndexingServiceRequest message is sent containing URIs of
32 // items to remove and Indexables to add. This is more reliable, and is best
33 // for clients which will also be utilizing Beagle for searching.
35 // (2) Files: The file to be indexed is dropped into the ~/.beagle/ToIndex
36 // directory. Another file with the same name prepended with a period is
37 // also dropped into the directory. In that file is the metadata for the
38 // file being indexed. The first line is the URI of the data being indexed.
39 // The second line is the hit type. The third line is the mime type. Then
40 // there are zero or more properties in the form "type:key=value", where
41 // "type" is either 't' for text or 'k' for keyword. This method is a lot
42 // easier to use, but requires that Beagle have inotify support enabled to
43 // work.
45 using System;
46 using System.Collections;
47 using System.IO;
48 using System.Threading;
50 using Beagle.Daemon;
51 using Beagle.Util;
53 namespace Beagle.Daemon.IndexingServiceQueryable {
55 [QueryableFlavor (Name="IndexingService", Domain=QueryDomain.Local, RequireInotify=false)]
56 public class IndexingServiceQueryable : LuceneQueryable {
58 public IndexingServiceQueryable () : base ("IndexingServiceIndex")
60 Server.RegisterRequestMessageHandler (typeof (IndexingServiceRequest), new Server.RequestMessageHandler (HandleMessage));
63 public override void Start ()
65 base.Start ();
67 ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
70 private void StartWorker ()
72 string index_path = Path.Combine (PathFinder.StorageDir, "ToIndex");
74 if (!Directory.Exists (index_path))
75 Directory.CreateDirectory (index_path);
77 if (Inotify.Enabled)
78 Inotify.Subscribe (index_path, OnInotifyEvent, Inotify.EventType.CloseWrite);
80 Logger.Log.Info ("Setting up an initial crawl of the IndexingService directory");
82 State = QueryableState.Crawling;
84 IndexableGenerator generator = new IndexableGenerator (GetIndexables (index_path));
85 Scheduler.Task task = NewAddTask (generator);
86 task.Tag = "IndexingService initial crawl";
87 ThisScheduler.Add (task);
89 State = QueryableState.Idle;
92 private IEnumerable GetIndexables (string path)
94 foreach (FileInfo file in DirectoryWalker.GetFileInfos (path)) {
95 if (file.Name [0] == '.')
96 continue;
98 if (File.Exists (Path.Combine (file.DirectoryName, "." + file.Name)))
99 yield return FileToIndexable (file);
102 yield break;
105 private Indexable FileToIndexable (FileInfo data_file)
107 FileInfo meta_file = new FileInfo (Path.Combine (data_file.DirectoryName, "." + data_file.Name));
108 FileStream meta_stream;
110 try {
111 meta_stream = meta_file.Open (FileMode.Open, FileAccess.Read, FileShare.Read);
112 } catch (FileNotFoundException) {
113 // The meta file disappeared before we could
114 // open it.
115 return null;
118 StreamReader reader = new StreamReader (meta_stream);
120 // First line of the file is a URI
121 string line = reader.ReadLine ();
122 Uri uri;
124 try {
125 uri = new Uri (line);
126 } catch (Exception e) {
127 Logger.Log.Warn (e, "IndexingService: Unable to parse URI in {0}:", meta_file.FullName);
128 meta_stream.Close ();
129 return null;
132 Indexable indexable = new Indexable (uri);
133 indexable.Timestamp = data_file.LastWriteTimeUtc;
134 indexable.ContentUri = UriFu.PathToFileUri (data_file.FullName);
135 indexable.DeleteContent = true;
137 // Second line is the hit type
138 line = reader.ReadLine ();
139 if (line == null) {
140 Logger.Log.Warn ("IndexingService: EOF reached trying to read hit type from {0}",
141 meta_file.FullName);
142 meta_stream.Close ();
143 return null;
144 } else if (line != String.Empty)
145 indexable.HitType = line;
147 // Third line is the mime type
148 line = reader.ReadLine ();
149 if (line == null) {
150 Logger.Log.Warn ("IndexingService: EOF reached trying to read mime type from {0}",
151 meta_file.FullName);
152 meta_stream.Close ();
153 return null;
154 } else if (line != String.Empty)
155 indexable.MimeType = line;
157 // Following lines are properties in "t:key=value" format
158 do {
159 line = reader.ReadLine ();
161 if (line != null && line != String.Empty) {
162 bool keyword = false;
164 if (line[0] == 'k')
165 keyword = true;
166 else if (line[0] != 't') {
167 Logger.Log.Warn ("IndexingService: Unknown property type: '{0}'", line[0]);
168 continue;
171 int i = line.IndexOf ('=');
173 if (i == -1) {
174 Logger.Log.Warn ("IndexingService: Unknown property line: '{0}'", line);
175 continue;
178 // FIXME: We should probably handle date types
179 if (keyword) {
180 indexable.AddProperty (Property.NewUnsearched (line.Substring (2, i - 2),
181 line.Substring (i + 1)));
182 } else {
183 indexable.AddProperty (Property.New (line.Substring (2, i - 2),
184 line.Substring (i + 1)));
187 } while (line != null);
189 indexable.LocalState ["MetaFile"] = meta_file;
191 // Ok, we're finished with the meta file. It will be
192 // deleted in PostAddHook ().
193 meta_stream.Close ();
195 return indexable;
198 // Bleh, we need to keep around a list of pending items to be
199 // indexed so that we don't actually index it twice because
200 // the order of the creation of the data file and meta file
201 // isn't defined.
202 private ArrayList pending_files = new ArrayList ();
204 private void OnInotifyEvent (Inotify.Watch watch,
205 string path,
206 string subitem,
207 string srcpath,
208 Inotify.EventType type)
210 if (subitem == "")
211 return;
213 if (subitem[0] == '.') {
214 string data_file = Path.Combine (path, subitem.Substring (1));
216 lock (pending_files) {
217 if (File.Exists (data_file) && ! pending_files.Contains (data_file)) {
218 pending_files.Add (data_file);
219 IndexFile (new FileInfo (data_file));
222 } else {
223 string meta_file = Path.Combine (path, "." + subitem);
224 string data_file = Path.Combine (path, subitem);
226 lock (pending_files) {
227 if (File.Exists (meta_file) && ! pending_files.Contains (data_file)) {
228 pending_files.Add (data_file);
229 IndexFile (new FileInfo (data_file));
235 private void IndexFile (FileInfo data_file)
237 Indexable indexable = FileToIndexable (data_file);
239 Scheduler.Task task = NewAddTask (indexable);
240 task.Priority = Scheduler.Priority.Immediate;
241 ThisScheduler.Add (task);
244 protected override void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
246 FileInfo meta_file = (FileInfo) indexable.LocalState ["MetaFile"];
247 meta_file.Delete ();
249 lock (pending_files)
250 pending_files.Remove (indexable.ContentUri.LocalPath);
253 private class IndexableGenerator : IIndexableGenerator {
254 private IEnumerator to_add_enumerator;
255 private int count = -1, done_count = 0;
257 public IndexableGenerator (IEnumerable to_add)
259 this.to_add_enumerator = to_add.GetEnumerator ();
262 public IndexableGenerator (ICollection to_add) : this ((IEnumerable) to_add)
264 this.count = to_add.Count;
267 public Indexable GetNextIndexable ()
269 return to_add_enumerator.Current as Indexable;
272 public bool HasNextIndexable ()
274 ++done_count;
275 return to_add_enumerator.MoveNext ();
278 public string StatusName {
279 get {
280 if (count == -1)
281 return String.Format ("IndexingService: {0}", done_count);
282 else
283 return String.Format ("IndexingService: {0} of {1}", done_count, count);
287 public void PostFlushHook ()
291 private ResponseMessage HandleMessage (RequestMessage msg)
293 IndexingServiceRequest isr = (IndexingServiceRequest) msg;
295 foreach (Uri uri in isr.ToRemove) {
296 Scheduler.Task task = NewRemoveTask (uri);
297 ThisScheduler.Add (task);
300 // FIXME: There should be a way for the request to control the
301 // scheduler priority of the task.
303 if (isr.ToAdd.Count > 0) {
304 IIndexableGenerator ind_gen = new IndexableGenerator (isr.ToAdd);
305 Scheduler.Task task = NewAddTask (ind_gen);
306 task.Priority = Scheduler.Priority.Immediate;
307 ThisScheduler.Add (task);
310 // FIXME: There should be an asynchronous response (fired by a Scheduler.Hook)
311 // that fires when all of the items have been added to the index.
313 // No response
314 return new EmptyResponse ();