4 // Copyright (C) 2005 Debajyoti Bera
7 // Permission is hereby granted, free of charge, to any person obtaining a
8 // copy of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the
12 // Software is furnished to do so, subject to the following conditions:
14 // The above copyright notice and this permission notice shall be included in
15 // all copies or substantial portions of the Software.
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
29 using System
.Threading
;
35 namespace Beagle
.Daemon
.KonqQueryable
{
37 [QueryableFlavor (Name
="KonquerorHistory", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
38 public class KonqQueryable
: LuceneFileQueryable
, IIndexableGenerator
{
40 private static Logger log
= Logger
.Get ("KonqQueryable");
42 string konq_cache_dir
;
43 private IEnumerator directory_enumerator
= null;
44 private int polling_interval_in_seconds
= 300; // 5 min
46 // ISO-Latin1 is 28591
47 private Encoding latin_encoding
= Encoding
.GetEncoding (28591);
49 public KonqQueryable () : base ("KonqHistoryIndex")
51 /* How to determine kio-http cache location ?
52 * From KDE web-page it looks like /var/tmp/kdecache-$USERNAME/http
54 konq_cache_dir
= "/var/tmp/kdecache-" + System
.Environment
.UserName
+ "/http";
57 /////////////////////////////////////////////////
59 public override void Start ()
62 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
65 private void StartWorker ()
67 if (!Directory
.Exists (konq_cache_dir
)) {
68 // if the directory is not present, user is not running KDE
69 // no need to periodically check
70 //GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
74 if (Inotify
.Enabled
) {
75 // watch konq_cache_dir for new directory creations
76 Inotify
.EventType mask
= Inotify
.EventType
.Create
;
77 Inotify
.Subscribe (konq_cache_dir
, OnInotifyEvent
, mask
);
79 Scheduler
.Task crawl_task
= Scheduler
.TaskFromHook (new Scheduler
.TaskHook (CrawlHook
));
80 crawl_task
.Tag
= "Crawling konqueror webcache";
81 crawl_task
.Source
= this;
82 ThisScheduler
.Add (crawl_task
);
85 log
.Info ("Starting Konq history backend ...");
91 State
= QueryableState
.Crawling
;
92 directory_enumerator
= DirectoryWalker
.GetDirectoryInfos (konq_cache_dir
).GetEnumerator ();
93 Scheduler
.Task crawl_task
= NewAddTask (this);
94 crawl_task
.Tag
= crawler_tag
;
95 ThisScheduler
.Add (crawl_task
);
96 State
= QueryableState
.Idle
;
99 private string crawler_tag
= "Konqueror History Crawler";
100 private void CrawlHook (Scheduler
.Task task
)
102 if (!ThisScheduler
.ContainsByTag (crawler_tag
)) {
106 task
.Reschedule
= true;
107 task
.TriggerTime
= DateTime
.Now
.AddSeconds (polling_interval_in_seconds
);
110 private bool CheckForExistence ()
112 if (!Directory
.Exists (konq_cache_dir
))
120 /////////////////////////////////////////////////
122 // Modified/Created event using Inotify
124 private void OnInotifyEvent (Inotify
.Watch watch
,
128 Inotify
.EventType type
)
133 // Watch konq_cache_dir for new directory creation
134 // Watch its subdirectories for new file creation
135 // If any file in created in konq_cache_dir, ignore it
136 // Its a Konq error otherwise
137 if ((type
& Inotify
.EventType
.IsDirectory
) == 0)
138 IndexSingleFile (Path
.Combine (path
, subitem
));
139 else if ((type
& Inotify
.EventType
.IsDirectory
) != 0)
140 Inotify
.Subscribe (konq_cache_dir
, OnInotifyEvent
, Inotify
.EventType
.CloseWrite
);
143 void IndexSingleFile (string path
)
145 if (path
.EndsWith (".new"))
147 Indexable indexable
= FileToIndexable (path
);
148 if (indexable
== null)
150 Scheduler
.Task task
= NewAddTask (indexable
);
151 task
.Priority
= Scheduler
.Priority
.Immediate
;
153 task
.SubPriority
= 0;
154 ThisScheduler
.Add (task
);
157 /////////////////////////////////////////////////
159 private Indexable
FileToIndexable (string path
) {
160 //Logger.Log.Debug ("KonqQ: Trying to index " + path);
164 stream
= new FileStream (path
, FileMode
.Open
, FileAccess
.Read
, FileShare
.Read
);
165 } catch (FileNotFoundException
) {
166 // that was fast - lost the file
170 StreamReader reader
= new StreamReader (stream
, latin_encoding
);
172 string creation_date
= null;
173 string mimetype
= null;
174 string charset
= null;
175 bool is_ok
= KonqHistoryUtil
.ShouldIndex (reader
,
181 if (!is_ok
|| url
== String
.Empty
) {
182 //Logger.Log.Debug ("KonqQ: Skipping non-html file " + path + " of type=" + mimetype);
183 // finding out if a cache file should be indexed is expensive
184 // so, soon after we run the test, write lastwritetime attribute
185 FileAttributesStore
.AttachLastWriteTime (path
, DateTime
.UtcNow
);
186 return null; // we wont index bad files and non-html files
189 Logger
.Log
.Debug ("KonqQ: Indexing " + path
+ " with url=" + url
);
190 Uri uri
= new Uri (url
, true);
191 if (uri
.Scheme
== Uri
.UriSchemeHttps
) {
192 Logger
.Log
.Error ("Indexing secure https:// URIs is not secure!");
196 Indexable indexable
= new Indexable (uri
);
197 indexable
.HitType
= "WebHistory";
198 indexable
.MimeType
= KonqHistoryUtil
.KonqCacheMimeType
;
199 // store www.beaglewiki.org as www beagle org, till inpath: query is implemented
200 indexable
.AddProperty (Property
.NewUnstored ("fixme:urltoken", StringFu
.UrlFuzzyDivide (url
)));
201 // hint for the filter about the charset
202 indexable
.AddProperty (Property
.NewUnsearched (StringFu
.UnindexedNamespace
+ "charset", charset
));
204 DateTime date
= new DateTime (1970, 1, 1);
205 date
= date
.AddSeconds (Int64
.Parse (creation_date
));
206 indexable
.Timestamp
= date
;
208 indexable
.ContentUri
= UriFu
.PathToFileUri (path
);
212 // FIXME: Implement removefile - removing files from history doesnt really make sense ? Do they ?
214 // ---------------- IIndexableGenerator --------------------------
215 private FileInfo current_file
;
216 private IEnumerator file_enumerator
= null;
218 public Indexable
GetNextIndexable ()
220 if (current_file
== null)
222 return FileToIndexable (current_file
.FullName
);
225 public bool HasNextIndexable ()
228 while (file_enumerator
== null || ! file_enumerator
.MoveNext ()) {
229 if (! directory_enumerator
.MoveNext ()) {
230 Logger
.Log
.Debug ("KonqQ: Crawling done");
231 file_enumerator
= null;
235 DirectoryInfo current_dir
= (DirectoryInfo
)directory_enumerator
.Current
;
236 //Logger.Log.Debug ("Trying dir:" + current_dir.Name);
237 // start watching for new files and get the list of current files
238 // kind of race here - might get duplicate files
240 Inotify
.Subscribe (current_dir
.FullName
, OnInotifyEvent
,
241 Inotify
.EventType
.Create
| Inotify
.EventType
.MovedTo
);
242 file_enumerator
= DirectoryWalker
.GetFileInfos (current_dir
).GetEnumerator ();
244 current_file
= (FileInfo
) file_enumerator
.Current
;
245 //if (!IsUpToDate (current_file.FullName))
246 // Logger.Log.Debug (current_file.FullName + " is not upto date");
247 } while (IsUpToDate (current_file
.FullName
));
252 public string StatusName
{
253 get { return String.Format ("KonquerorQueryable: Indexing {0}
", (current_file == null ? "Done
" : current_file.FullName)); }
256 public void PostFlushHook ()