4 // Copyright (C) 2005 Debajyoti Bera
7 // Permission is hereby granted, free of charge, to any person obtaining a
8 // copy of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the
12 // Software is furnished to do so, subject to the following conditions:
14 // The above copyright notice and this permission notice shall be included in
15 // all copies or substantial portions of the Software.
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
29 using System
.Threading
;
35 namespace Beagle
.Daemon
.KonqQueryable
{
37 [QueryableFlavor (Name
="KonquerorHistory", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
38 public class KonqQueryable
: LuceneFileQueryable
, IIndexableGenerator
{
40 string konq_cache_dir
;
41 private IEnumerator directory_enumerator
= null;
42 private int polling_interval_in_seconds
= 300; // 5 min
44 // ISO-Latin1 is 28591
45 private Encoding latin_encoding
= Encoding
.GetEncoding (28591);
47 public KonqQueryable () : base ("KonqHistoryIndex")
49 /* How to determine kio-http cache location ?
50 * From KDE web-page it looks like /var/tmp/kdecache-$USERNAME/http
52 //Now we use the $KDEVARTMP env variable
53 string tmpdir
= Environment
.GetEnvironmentVariable ("KDEVARTMP");
55 if (tmpdir
== null || tmpdir
== "")
58 konq_cache_dir
= Path
.Combine (tmpdir
, "kdecache-" + Environment
.UserName
);
59 konq_cache_dir
= Path
.Combine (konq_cache_dir
, "http");
60 Log
.Debug ("KonqCacheDir: " + konq_cache_dir
);
63 /////////////////////////////////////////////////
65 public override void Start ()
68 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
71 private void StartWorker ()
73 if (!Directory
.Exists (konq_cache_dir
)) {
74 // if the directory is not present, user is not running KDE
75 // no need to periodically check
76 //GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
80 if (Inotify
.Enabled
) {
81 // watch konq_cache_dir for new directory creations
82 Inotify
.EventType mask
= Inotify
.EventType
.Create
;
83 Inotify
.Subscribe (konq_cache_dir
, OnInotifyEvent
, mask
);
85 Scheduler
.Task crawl_task
= Scheduler
.TaskFromHook (new Scheduler
.TaskHook (CrawlHook
));
86 crawl_task
.Tag
= "Crawling konqueror webcache";
87 crawl_task
.Source
= this;
88 ThisScheduler
.Add (crawl_task
);
91 Log
.Info ("Starting Konq history backend ...");
97 directory_enumerator
= DirectoryWalker
.GetDirectoryInfos (konq_cache_dir
).GetEnumerator ();
98 Scheduler
.Task crawl_task
= NewAddTask (this);
99 crawl_task
.Tag
= crawler_tag
;
100 ThisScheduler
.Add (crawl_task
);
103 private string crawler_tag
= "Konqueror History Crawler";
104 private void CrawlHook (Scheduler
.Task task
)
106 if (!ThisScheduler
.ContainsByTag (crawler_tag
)) {
110 task
.Reschedule
= true;
111 task
.TriggerTime
= DateTime
.Now
.AddSeconds (polling_interval_in_seconds
);
114 private bool CheckForExistence ()
116 if (!Directory
.Exists (konq_cache_dir
))
124 /////////////////////////////////////////////////
126 // Modified/Created event using Inotify
128 private void OnInotifyEvent (Inotify
.Watch watch
,
132 Inotify
.EventType type
)
137 // Watch konq_cache_dir for new directory creation
138 // Watch its subdirectories for new file creation
139 // If any file in created in konq_cache_dir, ignore it
140 // Its a Konq error otherwise
141 if ((type
& Inotify
.EventType
.IsDirectory
) == 0)
142 IndexSingleFile (Path
.Combine (path
, subitem
));
143 else if ((type
& Inotify
.EventType
.IsDirectory
) != 0)
144 Inotify
.Subscribe (konq_cache_dir
, OnInotifyEvent
, Inotify
.EventType
.CloseWrite
);
147 void IndexSingleFile (string path
)
149 if (path
.EndsWith (".new"))
151 Indexable indexable
= FileToIndexable (path
);
152 if (indexable
== null)
154 Scheduler
.Task task
= NewAddTask (indexable
);
155 task
.Priority
= Scheduler
.Priority
.Immediate
;
157 task
.SubPriority
= 0;
158 ThisScheduler
.Add (task
);
161 /////////////////////////////////////////////////
163 private Indexable
FileToIndexable (string path
) {
164 //Logger.Log.Debug ("KonqQ: Trying to index " + path);
168 stream
= new FileStream (path
, FileMode
.Open
, FileAccess
.Read
, FileShare
.Read
);
169 } catch (FileNotFoundException
) {
170 // that was fast - lost the file
174 using (StreamReader reader
= new StreamReader (stream
, latin_encoding
)) {
176 string creation_date
= null;
177 string mimetype
= null;
178 string charset
= null;
179 bool is_ok
= KonqHistoryUtil
.ShouldIndex (reader
,
185 if (!is_ok
|| url
== String
.Empty
) {
186 //Logger.Log.Debug ("KonqQ: Skipping non-html file " + path + " of type=" + mimetype);
187 // finding out if a cache file should be indexed is expensive
188 // so, soon after we run the test, write lastwritetime attribute
189 FileAttributesStore
.AttachLastWriteTime (path
, DateTime
.UtcNow
);
190 return null; // we wont index bad files and non-html files
193 Logger
.Log
.Debug ("KonqQ: Indexing " + path
+ " with url=" + url
);
194 Uri uri
= new Uri (url
, true);
195 if (uri
.Scheme
== Uri
.UriSchemeHttps
) {
196 Logger
.Log
.Error ("Indexing secure https:// URIs is not secure!");
200 Indexable indexable
= new Indexable (uri
);
201 indexable
.HitType
= "WebHistory";
202 indexable
.MimeType
= KonqHistoryUtil
.KonqCacheMimeType
;
203 // store www.beaglewiki.org as www beagle org, till inpath: query is implemented
204 indexable
.AddProperty (Property
.NewUnstored ("fixme:urltoken", StringFu
.UrlFuzzyDivide (url
)));
205 // hint for the filter about the charset
206 indexable
.AddProperty (Property
.NewUnsearched (StringFu
.UnindexedNamespace
+ "charset", charset
));
208 DateTime date
= DateTimeUtil
.UnixToDateTimeUtc (0);
209 date
= date
.AddSeconds (Int64
.Parse (creation_date
));
210 indexable
.Timestamp
= date
;
212 indexable
.ContentUri
= UriFu
.PathToFileUri (path
);
217 // FIXME: Implement removefile - removing files from history doesnt really make sense ? Do they ?
219 // ---------------- IIndexableGenerator --------------------------
220 private FileInfo current_file
;
221 private IEnumerator file_enumerator
= null;
223 public Indexable
GetNextIndexable ()
225 if (current_file
== null)
227 return FileToIndexable (current_file
.FullName
);
230 public bool HasNextIndexable ()
233 while (file_enumerator
== null || ! file_enumerator
.MoveNext ()) {
234 if (! directory_enumerator
.MoveNext ()) {
235 Logger
.Log
.Debug ("KonqQ: Crawling done");
236 file_enumerator
= null;
240 DirectoryInfo current_dir
= (DirectoryInfo
)directory_enumerator
.Current
;
241 //Logger.Log.Debug ("Trying dir:" + current_dir.Name);
242 // start watching for new files and get the list of current files
243 // kind of race here - might get duplicate files
245 Inotify
.Subscribe (current_dir
.FullName
, OnInotifyEvent
,
246 Inotify
.EventType
.Create
| Inotify
.EventType
.MovedTo
);
247 file_enumerator
= DirectoryWalker
.GetFileInfos (current_dir
).GetEnumerator ();
249 current_file
= (FileInfo
) file_enumerator
.Current
;
250 //if (!IsUpToDate (current_file.FullName))
251 // Logger.Log.Debug (current_file.FullName + " is not upto date");
252 } while (IsUpToDate (current_file
.FullName
));
257 public string StatusName
{
258 get { return String.Format ("KonquerorQueryable: Indexing {0}
", (current_file == null ? "Done
" : current_file.FullName)); }
261 public void PostFlushHook ()