4 // Copyright (C) 2005 Debajyoti Bera
7 // Permission is hereby granted, free of charge, to any person obtaining a
8 // copy of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the
12 // Software is furnished to do so, subject to the following conditions:
14 // The above copyright notice and this permission notice shall be included in
15 // all copies or substantial portions of the Software.
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
29 using System
.Threading
;
35 namespace Beagle
.Daemon
.KonqQueryable
{
37 [QueryableFlavor (Name
="KonquerorHistory", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
38 public class KonqQueryable
: LuceneFileQueryable
, IIndexableGenerator
{
40 private static Logger log
= Logger
.Get ("KonqQueryable");
42 string konq_cache_dir
;
43 private IEnumerator directory_enumerator
= null;
44 private int polling_interval_in_seconds
= 300; // 5 min
46 // ISO-Latin1 is 28591
47 private Encoding latin_encoding
= Encoding
.GetEncoding (28591);
49 public KonqQueryable () : base ("KonqHistoryIndex")
51 /* How to determine kio-http cache location ?
52 * From KDE web-page it looks like /var/tmp/kdecache-$USERNAME/http
54 //Now we use the $KDEVARTMP env variable
55 string tmpdir
= Environment
.GetEnvironmentVariable ("KDEVARTMP");
57 if (tmpdir
== null || tmpdir
== "")
60 konq_cache_dir
= Path
.Combine (tmpdir
, "kdecache-" + Environment
.UserName
);
61 konq_cache_dir
= Path
.Combine (konq_cache_dir
, "http");
62 log
.Debug ("KonqCacheDir: " + konq_cache_dir
);
65 /////////////////////////////////////////////////
67 public override void Start ()
70 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
73 private void StartWorker ()
75 if (!Directory
.Exists (konq_cache_dir
)) {
76 // if the directory is not present, user is not running KDE
77 // no need to periodically check
78 //GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
82 if (Inotify
.Enabled
) {
83 // watch konq_cache_dir for new directory creations
84 Inotify
.EventType mask
= Inotify
.EventType
.Create
;
85 Inotify
.Subscribe (konq_cache_dir
, OnInotifyEvent
, mask
);
87 Scheduler
.Task crawl_task
= Scheduler
.TaskFromHook (new Scheduler
.TaskHook (CrawlHook
));
88 crawl_task
.Tag
= "Crawling konqueror webcache";
89 crawl_task
.Source
= this;
90 ThisScheduler
.Add (crawl_task
);
93 log
.Info ("Starting Konq history backend ...");
99 State
= QueryableState
.Crawling
;
100 directory_enumerator
= DirectoryWalker
.GetDirectoryInfos (konq_cache_dir
).GetEnumerator ();
101 Scheduler
.Task crawl_task
= NewAddTask (this);
102 crawl_task
.Tag
= crawler_tag
;
103 ThisScheduler
.Add (crawl_task
);
104 State
= QueryableState
.Idle
;
107 private string crawler_tag
= "Konqueror History Crawler";
108 private void CrawlHook (Scheduler
.Task task
)
110 if (!ThisScheduler
.ContainsByTag (crawler_tag
)) {
114 task
.Reschedule
= true;
115 task
.TriggerTime
= DateTime
.Now
.AddSeconds (polling_interval_in_seconds
);
118 private bool CheckForExistence ()
120 if (!Directory
.Exists (konq_cache_dir
))
128 /////////////////////////////////////////////////
130 // Modified/Created event using Inotify
132 private void OnInotifyEvent (Inotify
.Watch watch
,
136 Inotify
.EventType type
)
141 // Watch konq_cache_dir for new directory creation
142 // Watch its subdirectories for new file creation
143 // If any file in created in konq_cache_dir, ignore it
144 // Its a Konq error otherwise
145 if ((type
& Inotify
.EventType
.IsDirectory
) == 0)
146 IndexSingleFile (Path
.Combine (path
, subitem
));
147 else if ((type
& Inotify
.EventType
.IsDirectory
) != 0)
148 Inotify
.Subscribe (konq_cache_dir
, OnInotifyEvent
, Inotify
.EventType
.CloseWrite
);
151 void IndexSingleFile (string path
)
153 if (path
.EndsWith (".new"))
155 Indexable indexable
= FileToIndexable (path
);
156 if (indexable
== null)
158 Scheduler
.Task task
= NewAddTask (indexable
);
159 task
.Priority
= Scheduler
.Priority
.Immediate
;
161 task
.SubPriority
= 0;
162 ThisScheduler
.Add (task
);
165 /////////////////////////////////////////////////
167 private Indexable
FileToIndexable (string path
) {
168 //Logger.Log.Debug ("KonqQ: Trying to index " + path);
172 stream
= new FileStream (path
, FileMode
.Open
, FileAccess
.Read
, FileShare
.Read
);
173 } catch (FileNotFoundException
) {
174 // that was fast - lost the file
178 StreamReader reader
= new StreamReader (stream
, latin_encoding
);
180 string creation_date
= null;
181 string mimetype
= null;
182 string charset
= null;
183 bool is_ok
= KonqHistoryUtil
.ShouldIndex (reader
,
189 if (!is_ok
|| url
== String
.Empty
) {
190 //Logger.Log.Debug ("KonqQ: Skipping non-html file " + path + " of type=" + mimetype);
191 // finding out if a cache file should be indexed is expensive
192 // so, soon after we run the test, write lastwritetime attribute
193 FileAttributesStore
.AttachLastWriteTime (path
, DateTime
.UtcNow
);
194 return null; // we wont index bad files and non-html files
197 Logger
.Log
.Debug ("KonqQ: Indexing " + path
+ " with url=" + url
);
198 Uri uri
= new Uri (url
, true);
199 if (uri
.Scheme
== Uri
.UriSchemeHttps
) {
200 Logger
.Log
.Error ("Indexing secure https:// URIs is not secure!");
204 Indexable indexable
= new Indexable (uri
);
205 indexable
.HitType
= "WebHistory";
206 indexable
.MimeType
= KonqHistoryUtil
.KonqCacheMimeType
;
207 // store www.beaglewiki.org as www beagle org, till inpath: query is implemented
208 indexable
.AddProperty (Property
.NewUnstored ("fixme:urltoken", StringFu
.UrlFuzzyDivide (url
)));
209 // hint for the filter about the charset
210 indexable
.AddProperty (Property
.NewUnsearched (StringFu
.UnindexedNamespace
+ "charset", charset
));
212 DateTime date
= new DateTime (1970, 1, 1);
213 date
= date
.AddSeconds (Int64
.Parse (creation_date
));
214 indexable
.Timestamp
= date
;
216 indexable
.ContentUri
= UriFu
.PathToFileUri (path
);
220 // FIXME: Implement removefile - removing files from history doesnt really make sense ? Do they ?
222 // ---------------- IIndexableGenerator --------------------------
223 private FileInfo current_file
;
224 private IEnumerator file_enumerator
= null;
226 public Indexable
GetNextIndexable ()
228 if (current_file
== null)
230 return FileToIndexable (current_file
.FullName
);
233 public bool HasNextIndexable ()
236 while (file_enumerator
== null || ! file_enumerator
.MoveNext ()) {
237 if (! directory_enumerator
.MoveNext ()) {
238 Logger
.Log
.Debug ("KonqQ: Crawling done");
239 file_enumerator
= null;
243 DirectoryInfo current_dir
= (DirectoryInfo
)directory_enumerator
.Current
;
244 //Logger.Log.Debug ("Trying dir:" + current_dir.Name);
245 // start watching for new files and get the list of current files
246 // kind of race here - might get duplicate files
248 Inotify
.Subscribe (current_dir
.FullName
, OnInotifyEvent
,
249 Inotify
.EventType
.Create
| Inotify
.EventType
.MovedTo
);
250 file_enumerator
= DirectoryWalker
.GetFileInfos (current_dir
).GetEnumerator ();
252 current_file
= (FileInfo
) file_enumerator
.Current
;
253 //if (!IsUpToDate (current_file.FullName))
254 // Logger.Log.Debug (current_file.FullName + " is not upto date");
255 } while (IsUpToDate (current_file
.FullName
));
260 public string StatusName
{
261 get { return String.Format ("KonquerorQueryable: Indexing {0}
", (current_file == null ? "Done
" : current_file.FullName)); }
264 public void PostFlushHook ()