Fixed #374055:Only the first "tag" is detected in digikam.
[beagle.git] / beagled / KonqHistoryQueryable / KonqQueryable.cs
blob11a7ec728edf5ac52b79678cceaf30fca15fbc5c
1 //
2 // KonqQueryable.cs
3 //
4 // Copyright (C) 2005 Debajyoti Bera
5 //
6 //
7 // Permission is hereby granted, free of charge, to any person obtaining a
8 // copy of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the
12 // Software is furnished to do so, subject to the following conditions:
14 // The above copyright notice and this permission notice shall be included in
15 // all copies or substantial portions of the Software.
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 // DEALINGS IN THE SOFTWARE.
26 using System;
27 using System.IO;
28 using System.Collections;
29 using System.Threading;
30 using System.Text;
32 using Beagle.Daemon;
33 using Beagle.Util;
35 namespace Beagle.Daemon.KonqQueryable {
37 [QueryableFlavor (Name="KonquerorHistory", Domain=QueryDomain.Local, RequireInotify=false)]
38 public class KonqQueryable : LuceneFileQueryable, IIndexableGenerator {
40 private static Logger log = Logger.Get ("KonqQueryable");
42 string konq_cache_dir;
43 private IEnumerator directory_enumerator = null;
44 private int polling_interval_in_seconds = 300; // 5 min
46 // ISO-Latin1 is 28591
47 private Encoding latin_encoding = Encoding.GetEncoding (28591);
49 public KonqQueryable () : base ("KonqHistoryIndex")
51 /* How to determine kio-http cache location ?
52 * From KDE web-page it looks like /var/tmp/kdecache-$USERNAME/http
54 //Now we use the $KDEVARTMP env variable
55 string tmpdir = Environment.GetEnvironmentVariable ("KDEVARTMP");
57 if (tmpdir == null || tmpdir == "")
58 tmpdir = "/var/tmp";
60 konq_cache_dir = Path.Combine (tmpdir, "kdecache-" + Environment.UserName );
61 konq_cache_dir = Path.Combine (konq_cache_dir, "http");
62 log.Debug ("KonqCacheDir: " + konq_cache_dir);
65 /////////////////////////////////////////////////
67 public override void Start ()
69 base.Start ();
70 ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
73 private void StartWorker ()
75 if (!Directory.Exists (konq_cache_dir)) {
76 // if the directory is not present, user is not running KDE
77 // no need to periodically check
78 //GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
79 return;
82 if (Inotify.Enabled) {
83 // watch konq_cache_dir for new directory creations
84 Inotify.EventType mask = Inotify.EventType.Create;
85 Inotify.Subscribe (konq_cache_dir, OnInotifyEvent, mask);
86 } else {
87 Scheduler.Task crawl_task = Scheduler.TaskFromHook (new Scheduler.TaskHook (CrawlHook));
88 crawl_task.Tag = "Crawling konqueror webcache";
89 crawl_task.Source = this;
90 ThisScheduler.Add (crawl_task);
93 log.Info ("Starting Konq history backend ...");
94 Crawl ();
97 private void Crawl ()
99 State = QueryableState.Crawling;
100 directory_enumerator = DirectoryWalker.GetDirectoryInfos (konq_cache_dir).GetEnumerator ();
101 Scheduler.Task crawl_task = NewAddTask (this);
102 crawl_task.Tag = crawler_tag;
103 ThisScheduler.Add (crawl_task);
104 State = QueryableState.Idle;
107 private string crawler_tag = "Konqueror History Crawler";
108 private void CrawlHook (Scheduler.Task task)
110 if (!ThisScheduler.ContainsByTag (crawler_tag)) {
111 Crawl ();
114 task.Reschedule = true;
115 task.TriggerTime = DateTime.Now.AddSeconds (polling_interval_in_seconds);
118 private bool CheckForExistence ()
120 if (!Directory.Exists (konq_cache_dir))
121 return true;
123 this.Start ();
125 return false;
128 /////////////////////////////////////////////////
130 // Modified/Created event using Inotify
132 private void OnInotifyEvent (Inotify.Watch watch,
133 string path,
134 string subitem,
135 string srcpath,
136 Inotify.EventType type)
138 if (subitem == "")
139 return;
141 // Watch konq_cache_dir for new directory creation
142 // Watch its subdirectories for new file creation
143 // If any file in created in konq_cache_dir, ignore it
144 // Its a Konq error otherwise
145 if ((type & Inotify.EventType.IsDirectory) == 0)
146 IndexSingleFile (Path.Combine (path, subitem));
147 else if ((type & Inotify.EventType.IsDirectory) != 0)
148 Inotify.Subscribe (konq_cache_dir, OnInotifyEvent, Inotify.EventType.CloseWrite);
151 void IndexSingleFile (string path)
153 if (path.EndsWith (".new"))
154 return;
155 Indexable indexable = FileToIndexable (path);
156 if (indexable == null)
157 return;
158 Scheduler.Task task = NewAddTask (indexable);
159 task.Priority = Scheduler.Priority.Immediate;
160 task.Tag = path;
161 task.SubPriority = 0;
162 ThisScheduler.Add (task);
165 /////////////////////////////////////////////////
167 private Indexable FileToIndexable (string path) {
168 //Logger.Log.Debug ("KonqQ: Trying to index " + path);
170 FileStream stream;
171 try {
172 stream = new FileStream (path, FileMode.Open, FileAccess.Read, FileShare.Read);
173 } catch (FileNotFoundException) {
174 // that was fast - lost the file
175 return null;
178 StreamReader reader = new StreamReader (stream, latin_encoding);
179 string url = null;
180 string creation_date = null;
181 string mimetype = null;
182 string charset = null;
183 bool is_ok = KonqHistoryUtil.ShouldIndex (reader,
184 out url,
185 out creation_date,
186 out mimetype,
187 out charset);
189 if (!is_ok || url == String.Empty) {
190 //Logger.Log.Debug ("KonqQ: Skipping non-html file " + path + " of type=" + mimetype);
191 // finding out if a cache file should be indexed is expensive
192 // so, soon after we run the test, write lastwritetime attribute
193 FileAttributesStore.AttachLastWriteTime (path, DateTime.UtcNow);
194 return null; // we wont index bad files and non-html files
197 Logger.Log.Debug ("KonqQ: Indexing " + path + " with url=" + url);
198 Uri uri = new Uri (url, true);
199 if (uri.Scheme == Uri.UriSchemeHttps) {
200 Logger.Log.Error ("Indexing secure https:// URIs is not secure!");
201 return null;
204 Indexable indexable = new Indexable (uri);
205 indexable.HitType = "WebHistory";
206 indexable.MimeType = KonqHistoryUtil.KonqCacheMimeType;
207 // store www.beaglewiki.org as www beagle org, till inpath: query is implemented
208 indexable.AddProperty (Property.NewUnstored ("fixme:urltoken", StringFu.UrlFuzzyDivide (url)));
209 // hint for the filter about the charset
210 indexable.AddProperty (Property.NewUnsearched (StringFu.UnindexedNamespace + "charset", charset));
212 DateTime date = new DateTime (1970, 1, 1);
213 date = date.AddSeconds (Int64.Parse (creation_date));
214 indexable.Timestamp = date;
216 indexable.ContentUri = UriFu.PathToFileUri (path);
217 return indexable;
220 // FIXME: Implement removefile - removing files from history doesnt really make sense ? Do they ?
222 // ---------------- IIndexableGenerator --------------------------
223 private FileInfo current_file;
224 private IEnumerator file_enumerator = null;
226 public Indexable GetNextIndexable ()
228 if (current_file == null)
229 return null;
230 return FileToIndexable (current_file.FullName);
233 public bool HasNextIndexable ()
235 do {
236 while (file_enumerator == null || ! file_enumerator.MoveNext ()) {
237 if (! directory_enumerator.MoveNext ()) {
238 Logger.Log.Debug ("KonqQ: Crawling done");
239 file_enumerator = null;
240 current_file = null;
241 return false;
243 DirectoryInfo current_dir = (DirectoryInfo)directory_enumerator.Current;
244 //Logger.Log.Debug ("Trying dir:" + current_dir.Name);
245 // start watching for new files and get the list of current files
246 // kind of race here - might get duplicate files
247 if (Inotify.Enabled)
248 Inotify.Subscribe (current_dir.FullName, OnInotifyEvent,
249 Inotify.EventType.Create | Inotify.EventType.MovedTo);
250 file_enumerator = DirectoryWalker.GetFileInfos (current_dir).GetEnumerator ();
252 current_file = (FileInfo) file_enumerator.Current;
253 //if (!IsUpToDate (current_file.FullName))
254 // Logger.Log.Debug (current_file.FullName + " is not upto date");
255 } while (IsUpToDate (current_file.FullName));
257 return true;
260 public string StatusName {
261 get { return String.Format ("KonquerorQueryable: Indexing {0}", (current_file == null ? "Done" : current_file.FullName)); }
264 public void PostFlushHook ()