2006-12-14 Jovan Naumovski <jovanna@cvs.gnome.org> *mk.po: Updated Macedonian translation
[beagle.git] / beagled / KonqHistoryQueryable / KonqQueryable.cs
blob2d2e1609902c2b8cbcefde1e87474f3d2dd949b9
1 //
2 // KonqQueryable.cs
3 //
4 // Copyright (C) 2005 Debajyoti Bera
5 //
6 //
7 // Permission is hereby granted, free of charge, to any person obtaining a
8 // copy of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the
12 // Software is furnished to do so, subject to the following conditions:
14 // The above copyright notice and this permission notice shall be included in
15 // all copies or substantial portions of the Software.
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 // DEALINGS IN THE SOFTWARE.
26 using System;
27 using System.IO;
28 using System.Collections;
29 using System.Threading;
30 using System.Text;
32 using Beagle.Daemon;
33 using Beagle.Util;
35 namespace Beagle.Daemon.KonqQueryable {
37 [QueryableFlavor (Name="KonquerorHistory", Domain=QueryDomain.Local, RequireInotify=false)]
38 public class KonqQueryable : LuceneFileQueryable, IIndexableGenerator {
40 string konq_cache_dir;
41 private IEnumerator directory_enumerator = null;
42 private int polling_interval_in_seconds = 300; // 5 min
44 // ISO-Latin1 is 28591
45 private Encoding latin_encoding = Encoding.GetEncoding (28591);
47 public KonqQueryable () : base ("KonqHistoryIndex")
49 /* How to determine kio-http cache location ?
50 * From KDE web-page it looks like /var/tmp/kdecache-$USERNAME/http
52 //Now we use the $KDEVARTMP env variable
53 string tmpdir = Environment.GetEnvironmentVariable ("KDEVARTMP");
55 if (tmpdir == null || tmpdir == "")
56 tmpdir = "/var/tmp";
58 konq_cache_dir = Path.Combine (tmpdir, "kdecache-" + Environment.UserName );
59 konq_cache_dir = Path.Combine (konq_cache_dir, "http");
60 Log.Debug ("KonqCacheDir: " + konq_cache_dir);
63 /////////////////////////////////////////////////
65 public override void Start ()
67 base.Start ();
68 ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
71 private void StartWorker ()
73 if (!Directory.Exists (konq_cache_dir)) {
74 // if the directory is not present, user is not running KDE
75 // no need to periodically check
76 //GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
77 return;
80 if (Inotify.Enabled) {
81 // watch konq_cache_dir for new directory creations
82 Inotify.EventType mask = Inotify.EventType.Create;
83 Inotify.Subscribe (konq_cache_dir, OnInotifyEvent, mask);
84 } else {
85 Scheduler.Task crawl_task = Scheduler.TaskFromHook (new Scheduler.TaskHook (CrawlHook));
86 crawl_task.Tag = "Crawling konqueror webcache";
87 crawl_task.Source = this;
88 ThisScheduler.Add (crawl_task);
91 Log.Info ("Starting Konq history backend ...");
92 Crawl ();
95 private void Crawl ()
97 directory_enumerator = DirectoryWalker.GetDirectoryInfos (konq_cache_dir).GetEnumerator ();
98 Scheduler.Task crawl_task = NewAddTask (this);
99 crawl_task.Tag = crawler_tag;
100 ThisScheduler.Add (crawl_task);
103 private string crawler_tag = "Konqueror History Crawler";
104 private void CrawlHook (Scheduler.Task task)
106 if (!ThisScheduler.ContainsByTag (crawler_tag)) {
107 Crawl ();
110 task.Reschedule = true;
111 task.TriggerTime = DateTime.Now.AddSeconds (polling_interval_in_seconds);
114 private bool CheckForExistence ()
116 if (!Directory.Exists (konq_cache_dir))
117 return true;
119 this.Start ();
121 return false;
124 /////////////////////////////////////////////////
126 // Modified/Created event using Inotify
128 private void OnInotifyEvent (Inotify.Watch watch,
129 string path,
130 string subitem,
131 string srcpath,
132 Inotify.EventType type)
134 if (subitem == "")
135 return;
137 // Watch konq_cache_dir for new directory creation
138 // Watch its subdirectories for new file creation
139 // If any file in created in konq_cache_dir, ignore it
140 // Its a Konq error otherwise
141 if ((type & Inotify.EventType.IsDirectory) == 0)
142 IndexSingleFile (Path.Combine (path, subitem));
143 else if ((type & Inotify.EventType.IsDirectory) != 0)
144 Inotify.Subscribe (konq_cache_dir, OnInotifyEvent, Inotify.EventType.CloseWrite);
147 void IndexSingleFile (string path)
149 if (path.EndsWith (".new"))
150 return;
151 Indexable indexable = FileToIndexable (path);
152 if (indexable == null)
153 return;
154 Scheduler.Task task = NewAddTask (indexable);
155 task.Priority = Scheduler.Priority.Immediate;
156 task.Tag = path;
157 task.SubPriority = 0;
158 ThisScheduler.Add (task);
161 /////////////////////////////////////////////////
163 private Indexable FileToIndexable (string path) {
164 //Logger.Log.Debug ("KonqQ: Trying to index " + path);
166 FileStream stream;
167 try {
168 stream = new FileStream (path, FileMode.Open, FileAccess.Read, FileShare.Read);
169 } catch (FileNotFoundException) {
170 // that was fast - lost the file
171 return null;
174 using (StreamReader reader = new StreamReader (stream, latin_encoding)) {
175 string url = null;
176 string creation_date = null;
177 string mimetype = null;
178 string charset = null;
179 bool is_ok = KonqHistoryUtil.ShouldIndex (reader,
180 out url,
181 out creation_date,
182 out mimetype,
183 out charset);
185 if (!is_ok || url == String.Empty) {
186 //Logger.Log.Debug ("KonqQ: Skipping non-html file " + path + " of type=" + mimetype);
187 // finding out if a cache file should be indexed is expensive
188 // so, soon after we run the test, write lastwritetime attribute
189 FileAttributesStore.AttachLastWriteTime (path, DateTime.UtcNow);
190 return null; // we wont index bad files and non-html files
193 Logger.Log.Debug ("KonqQ: Indexing " + path + " with url=" + url);
194 Uri uri = new Uri (url, true);
195 if (uri.Scheme == Uri.UriSchemeHttps) {
196 Logger.Log.Error ("Indexing secure https:// URIs is not secure!");
197 return null;
200 Indexable indexable = new Indexable (uri);
201 indexable.HitType = "WebHistory";
202 indexable.MimeType = KonqHistoryUtil.KonqCacheMimeType;
203 // store www.beaglewiki.org as www beagle org, till inpath: query is implemented
204 indexable.AddProperty (Property.NewUnstored ("fixme:urltoken", StringFu.UrlFuzzyDivide (url)));
205 // hint for the filter about the charset
206 indexable.AddProperty (Property.NewUnsearched (StringFu.UnindexedNamespace + "charset", charset));
208 DateTime date = DateTimeUtil.UnixToDateTimeUtc (0);
209 date = date.AddSeconds (Int64.Parse (creation_date));
210 indexable.Timestamp = date;
212 indexable.ContentUri = UriFu.PathToFileUri (path);
213 return indexable;
217 // FIXME: Implement removefile - removing files from history doesnt really make sense ? Do they ?
219 // ---------------- IIndexableGenerator --------------------------
220 private FileInfo current_file;
221 private IEnumerator file_enumerator = null;
223 public Indexable GetNextIndexable ()
225 if (current_file == null)
226 return null;
227 return FileToIndexable (current_file.FullName);
230 public bool HasNextIndexable ()
232 do {
233 while (file_enumerator == null || ! file_enumerator.MoveNext ()) {
234 if (! directory_enumerator.MoveNext ()) {
235 Logger.Log.Debug ("KonqQ: Crawling done");
236 file_enumerator = null;
237 current_file = null;
238 return false;
240 DirectoryInfo current_dir = (DirectoryInfo)directory_enumerator.Current;
241 //Logger.Log.Debug ("Trying dir:" + current_dir.Name);
242 // start watching for new files and get the list of current files
243 // kind of race here - might get duplicate files
244 if (Inotify.Enabled)
245 Inotify.Subscribe (current_dir.FullName, OnInotifyEvent,
246 Inotify.EventType.Create | Inotify.EventType.MovedTo);
247 file_enumerator = DirectoryWalker.GetFileInfos (current_dir).GetEnumerator ();
249 current_file = (FileInfo) file_enumerator.Current;
250 //if (!IsUpToDate (current_file.FullName))
251 // Logger.Log.Debug (current_file.FullName + " is not upto date");
252 } while (IsUpToDate (current_file.FullName));
254 return true;
257 public string StatusName {
258 get { return String.Format ("KonquerorQueryable: Indexing {0}", (current_file == null ? "Done" : current_file.FullName)); }
261 public void PostFlushHook ()