2 // AkregatorQueryable.cs
4 // Copyright (C) 2005 Debajyoti Bera
7 // Permission is hereby granted, free of charge, to any person obtaining a
8 // copy of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the
12 // Software is furnished to do so, subject to the following conditions:
14 // The above copyright notice and this permission notice shall be included in
15 // all copies or substantial portions of the Software.
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
29 using System
.Threading
;
32 using System
.Xml
.Serialization
;
33 using System
.Globalization
;
38 namespace Beagle
.Daemon
.AkregatorQueryable
{
40 [QueryableFlavor (Name
="Akregator", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
41 public class AkregatorQueryable
: LuceneFileQueryable
{
45 // construct a serializer and keep it handy for indexablegenerator to use
46 private XmlSerializer serializer
= null;
47 public XmlSerializer Serializer
{
49 if (serializer
== null)
50 serializer
= new XmlSerializer (typeof (Item
));
55 // store the file size indexed by the filenames
56 // akregator unnecessarily saves files
57 private Hashtable file_sizes
;
58 public long GetFileSize (string name
)
60 if (! file_sizes
.Contains (name
))
62 return (long)file_sizes
[name
];
64 public void SetFileSize (string name
, long size
)
66 file_sizes
[name
] = size
;
69 // add versioning of index
70 // v1: change property names to DC names,
71 // store feed_file as ParentUri
72 // v2: remove dc:date, use Timestamp property.
73 private const int INDEX_VERSION
= 2;
75 public AkregatorQueryable () : base ("AkregatorIndex", INDEX_VERSION
)
77 akregator_dir
= Path
.Combine (PathFinder
.HomeDir
, ".kde");
78 akregator_dir
= Path
.Combine (akregator_dir
, "share");
79 akregator_dir
= Path
.Combine (akregator_dir
, "apps");
80 akregator_dir
= Path
.Combine (akregator_dir
, "akregator");
81 akregator_dir
= Path
.Combine (akregator_dir
, "Archive");
83 file_sizes
= new Hashtable ();
86 /////////////////////////////////////////////////
88 public override void Start ()
92 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
95 private void StartWorker ()
97 if (!Directory
.Exists (akregator_dir
)) {
98 GLib
.Timeout
.Add (60000, new GLib
.TimeoutHandler (CheckForExistence
));
102 if (Inotify
.Enabled
) {
103 Inotify
.EventType mask
= Inotify
.EventType
.CloseWrite
104 | Inotify
.EventType
.Delete
;
106 Inotify
.Subscribe (akregator_dir
, OnInotifyEvent
, mask
);
108 FileSystemWatcher fsw
= new FileSystemWatcher ();
109 fsw
.Path
= akregator_dir
;
111 fsw
.Changed
+= new FileSystemEventHandler (OnChanged
);
112 fsw
.Created
+= new FileSystemEventHandler (OnChanged
);
114 fsw
.EnableRaisingEvents
= true;
117 Log
.Info ("Scanning Akregator feeds...");
119 Stopwatch stopwatch
= new Stopwatch ();
122 DirectoryInfo dir
= new DirectoryInfo (akregator_dir
);
124 foreach (FileInfo file
in DirectoryWalker
.GetFileInfos (dir
)) {
125 if (file
.Extension
== ".xml") {
126 IndexSingleFeed (file
.FullName
, true);
132 Log
.Info ("{0} files will be parsed (scanned in {1})", count
, stopwatch
);
135 private bool CheckForExistence ()
137 if (!Directory
.Exists (akregator_dir
))
145 /////////////////////////////////////////////////
147 // Modified/Created event using Inotify
149 private void OnInotifyEvent (Inotify
.Watch watch
,
153 Inotify
.EventType type
)
155 if (subitem
== "" || !subitem
.EndsWith (".xml"))
158 if ((type
& Inotify
.EventType
.CloseWrite
) != 0)
159 IndexSingleFeed (Path
.Combine (path
, subitem
), false);
160 else if ((type
& Inotify
.EventType
.Delete
) != 0)
161 RemoveFeedFile (Path
.Combine (path
, subitem
));
164 // Modified/Created event using FSW
166 private void OnChanged (object o
, FileSystemEventArgs args
)
168 IndexSingleFeed (args
.FullPath
, false);
171 /////////////////////////////////////////////////
173 // Parse and index a single feed
175 private void IndexSingleFeed (string filename
, bool initial_scan
) {
176 if (! filename
.EndsWith (".xml"))
178 if (ThisScheduler
.ContainsByTag (filename
)) {
179 Log
.Debug ("Not adding task for already running task: {0}", filename
);
183 FeedIndexableGenerator generator
= new FeedIndexableGenerator (this, filename
, initial_scan
);
185 task
= NewAddTask (generator
);
187 ThisScheduler
.Add (task
);
190 private void RemoveFeedFile (string file
) {
191 Log
.Debug ("Removing Akregator feedfile:" + file
);
192 Uri uri
= UriFu
.PathToFileUri (file
);
193 Scheduler
.Task task
= NewRemoveTask (uri
);
194 task
.Priority
= Scheduler
.Priority
.Immediate
;
195 task
.SubPriority
= 0;
196 ThisScheduler
.Add (task
);
202 * Indexable generator for Akregator Feeds
204 public class FeedIndexableGenerator
: IIndexableGenerator
{
205 private string feed_file
;
206 private AkregatorQueryable queryable
;
208 private XmlTextReader reader
;
209 private bool is_valid_file
= true;
210 private bool initial_scan
= false;
212 private string channel_title
;
213 private string channel_link
;
214 private string channel_description
;
216 private Item current_item
;
217 private XmlSerializer serializer
;
219 public FeedIndexableGenerator (AkregatorQueryable queryable
, string feed_file
, bool initial_scan
)
221 this.queryable
= queryable
;
222 this.feed_file
= feed_file
;
223 this.serializer
= queryable
.Serializer
;
224 this.initial_scan
= initial_scan
;
228 public void PostFlushHook ()
232 public string StatusName
{
233 get { return feed_file; }
236 private bool IsUpToDate (string path
)
238 // first check the file date
239 if (queryable
.FileAttributesStore
.IsUpToDate (path
))
241 // if not up to date and initial scan, then we should index
244 // next check the size - its really unlucky if the file is changed
245 // and yet the size is same
246 // FIXME: Maybe store the md5-hash of the file - that is less expensive
247 // than indexing all the feeds in the file!
248 FileInfo file
= new FileInfo (path
);
249 if (queryable
.GetFileSize (path
) != file
.Length
)
254 private void ReadFeedHeader () {
256 if (IsUpToDate (feed_file
)) {
257 is_valid_file
= false;
261 Log
.Debug ("Opening feed file: {0}", feed_file
);
262 reader
= new XmlTextReader (feed_file
);
263 reader
.WhitespaceHandling
= WhitespaceHandling
.None
;
265 is_valid_file
= true;
267 // move to beginning of document
268 reader
.MoveToContent();
269 // move to <rss ...> node
270 reader
.ReadStartElement ("rss");
271 // move to <channel> node
272 reader
.ReadStartElement ("channel");
277 string elementName
= reader
.Name
;
278 if (elementName
== "item")
280 switch (elementName
) {
282 reader
.ReadStartElement ("title");
283 channel_title
= reader
.ReadString ();
284 reader
.ReadEndElement ();
288 reader
.ReadStartElement ("link");
289 channel_link
= reader
.ReadString ();
290 reader
.ReadEndElement ();
294 reader
.ReadStartElement ("description");
295 channel_description
= reader
.ReadString ();
296 reader
.ReadEndElement ();
299 // ignore other elements
301 reader
.ReadOuterXml ();
304 } while (!reader
.EOF
&& reader
.NodeType
== XmlNodeType
.Element
);
305 } catch (XmlException ex
) {
306 Log
.Warn (ex
, "Caught exception parsing feed file:");
307 is_valid_file
= false;
312 public bool HasNextIndexable ()
315 if (!is_valid_file
|| reader
== null)
317 string itemString
= "";
319 // check if the reader is at the startnode
320 if (reader
.NodeType
== XmlNodeType
.Element
) {
321 itemString
= reader
.ReadOuterXml ();
322 // form node object from the <node>...</node> string
323 // FIXME Deserialize is expensive - remove it altogether
324 current_item
= (Item
) serializer
.Deserialize (new StringReader (itemString
));
326 } catch (XmlException ex
) {
327 // probably no more <item>
330 if (current_item
== null) {
331 //Log.Debug ("AkregatorQ: Probably no more feeds left in " + feed_file);
332 //Log.Debug ("Causing string = " + itemString);
334 is_valid_file
= false;
339 return is_valid_file
;
342 private void StoreFileSize ()
344 // cache the file size
345 FileInfo file
= new FileInfo (feed_file
);
346 queryable
.SetFileSize (feed_file
, file
.Length
);
349 public Indexable
GetNextIndexable ()
351 if (current_item
!= null || !current_item
.IsDeleted
)
352 return current_itemToIndexable ();
357 private Indexable
current_itemToIndexable ()
360 if (current_item
== null)
363 //Log.Debug ("Indexing " + channel_link + ":" + current_item.Link);
364 Indexable indexable
= new Indexable (new Uri (String
.Format ("feed:{0};item={1}", channel_link
, current_item
.Link
)));
365 indexable
.ParentUri
= UriFu
.PathToFileUri (feed_file
);
366 indexable
.MimeType
= "text/html";
367 indexable
.HitType
= "FeedItem";
369 string RFC822
= "ddd, dd MMM yyyy HH:mm:ss zzz";
370 DateTime date
= DateTime
.ParseExact(current_item
.PubDate
, RFC822
, DateTimeFormatInfo
.InvariantInfo
, DateTimeStyles
.AdjustToUniversal
);
371 indexable
.Timestamp
= date
;
373 // replace property names with Dublin Core names
374 indexable
.AddProperty (Property
.New ("dc:title", current_item
.Title
));
375 indexable
.AddProperty (Property
.NewKeyword ("dc:identifier", current_item
.Link
));
376 indexable
.AddProperty (Property
.NewKeyword ("dc:source", channel_link
));
377 indexable
.AddProperty (Property
.New ("dc:publisher", channel_title
));
379 StringReader reader
= new StringReader (current_item
.Description
);
380 indexable
.SetTextReader (reader
);
387 public class MetaInfo
{
389 public string value = "";
390 [XmlAttribute ("type")] public string Type
= "";
393 // we will deserialize XML fragments, so there wont be any <? xml ... ?>
394 [System
.Xml
.Serialization
.XmlRoot("item", Namespace
="", IsNullable
=false)]
395 [System
.Xml
.Serialization
.XmlType("item", Namespace
="")]
397 [XmlElement ("pubDate")] public string PubDate
;
398 [XmlElement ("title")] public string Title
= "";
399 [XmlElement ("description")] public string Description
="";
400 [XmlElement ("link")] public string Link
="";
401 [XmlElement ("meta", typeof (MetaInfo
), Namespace
="http://foobar")]
402 public ArrayList MetaList
{
403 get { return metaList; }
404 set { metaList = value; }
406 private ArrayList metaList
= new ArrayList ();
408 public bool IsDeleted
{
410 for (int i
=0; i
<metaList
.Count
; ++i
) {
411 MetaInfo meta
= (MetaInfo
)metaList
[i
];
412 if (meta
.Type
== "deleted" && meta
.value == "true") {