2 // AkregatorQueryable.cs
4 // Copyright (C) 2005 Debajyoti Bera
7 // Permission is hereby granted, free of charge, to any person obtaining a
8 // copy of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the
12 // Software is furnished to do so, subject to the following conditions:
14 // The above copyright notice and this permission notice shall be included in
15 // all copies or substantial portions of the Software.
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
29 using System
.Threading
;
32 using System
.Xml
.Serialization
;
33 using System
.Globalization
;
38 namespace Beagle
.Daemon
.AkregatorQueryable
{
40 [QueryableFlavor (Name
="Akregator", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
41 public class AkregatorQueryable
: LuceneFileQueryable
{
43 private static Logger log
= Logger
.Get ("AkregatorQueryable");
47 // construct a serializer and keep it handy for indexablegenerator to use
48 private XmlSerializer serializer
= null;
49 public XmlSerializer Serializer
{
51 if (serializer
== null)
52 serializer
= new XmlSerializer (typeof (Item
));
57 // store the file size indexed by the filenames
58 // akregator unnecessarily saves files
59 private Hashtable file_sizes
;
60 public long GetFileSize (string name
)
62 if (! file_sizes
.Contains (name
))
64 return (long)file_sizes
[name
];
66 public void SetFileSize (string name
, long size
)
68 file_sizes
[name
] = size
;
71 // add versioning of index
72 // v1: change property names to DC names,
73 // store feed_file as ParentUri
74 private const int INDEX_VERSION
= 1;
76 public AkregatorQueryable () : base ("AkregatorIndex", INDEX_VERSION
)
78 akregator_dir
= Path
.Combine (PathFinder
.HomeDir
, ".kde");
79 akregator_dir
= Path
.Combine (akregator_dir
, "share");
80 akregator_dir
= Path
.Combine (akregator_dir
, "apps");
81 akregator_dir
= Path
.Combine (akregator_dir
, "akregator");
82 akregator_dir
= Path
.Combine (akregator_dir
, "Archive");
84 file_sizes
= new Hashtable ();
87 /////////////////////////////////////////////////
89 public override void Start ()
93 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
96 private void StartWorker ()
98 if (!Directory
.Exists (akregator_dir
)) {
99 GLib
.Timeout
.Add (60000, new GLib
.TimeoutHandler (CheckForExistence
));
103 if (Inotify
.Enabled
) {
104 Inotify
.EventType mask
= Inotify
.EventType
.CloseWrite
105 | Inotify
.EventType
.Delete
;
107 Inotify
.Subscribe (akregator_dir
, OnInotifyEvent
, mask
);
109 FileSystemWatcher fsw
= new FileSystemWatcher ();
110 fsw
.Path
= akregator_dir
;
112 fsw
.Changed
+= new FileSystemEventHandler (OnChanged
);
113 fsw
.Created
+= new FileSystemEventHandler (OnChanged
);
115 fsw
.EnableRaisingEvents
= true;
118 log
.Info ("Scanning Akregator feeds...");
120 State
= QueryableState
.Crawling
;
121 Stopwatch stopwatch
= new Stopwatch ();
124 DirectoryInfo dir
= new DirectoryInfo (akregator_dir
);
126 foreach (FileInfo file
in DirectoryWalker
.GetFileInfos (dir
)) {
127 if (file
.Extension
== ".xml") {
128 IndexSingleFeed (file
.FullName
, true);
133 State
= QueryableState
.Idle
;
135 log
.Info ("{0} files will be parsed (scanned in {1})", count
, stopwatch
);
138 private bool CheckForExistence ()
140 if (!Directory
.Exists (akregator_dir
))
148 /////////////////////////////////////////////////
150 // Modified/Created event using Inotify
152 private void OnInotifyEvent (Inotify
.Watch watch
,
156 Inotify
.EventType type
)
158 if (subitem
== "" || !subitem
.EndsWith (".xml"))
161 if ((type
& Inotify
.EventType
.CloseWrite
) != 0)
162 IndexSingleFeed (Path
.Combine (path
, subitem
), false);
163 else if ((type
& Inotify
.EventType
.Delete
) != 0)
164 RemoveFeedFile (Path
.Combine (path
, subitem
));
167 // Modified/Created event using FSW
169 private void OnChanged (object o
, FileSystemEventArgs args
)
171 IndexSingleFeed (args
.FullPath
, false);
174 /////////////////////////////////////////////////
176 // Parse and index a single feed
178 private void IndexSingleFeed (string filename
, bool initial_scan
) {
179 if (! filename
.EndsWith (".xml"))
181 if (ThisScheduler
.ContainsByTag (filename
)) {
182 Logger
.Log
.Debug ("Not adding task for already running task: {0}", filename
);
186 FeedIndexableGenerator generator
= new FeedIndexableGenerator (this, filename
, initial_scan
);
188 task
= NewAddTask (generator
);
190 ThisScheduler
.Add (task
);
193 private void RemoveFeedFile (string file
) {
194 Logger
.Log
.Debug ("Removing Akregator feedfile:" + file
);
195 Uri uri
= UriFu
.PathToFileUri (file
);
196 Scheduler
.Task task
= NewRemoveTask (uri
);
197 task
.Priority
= Scheduler
.Priority
.Immediate
;
198 task
.SubPriority
= 0;
199 ThisScheduler
.Add (task
);
205 * Indexable generator for Akregator Feeds
207 public class FeedIndexableGenerator
: IIndexableGenerator
{
208 private string feed_file
;
209 private AkregatorQueryable queryable
;
211 private XmlTextReader reader
;
212 private bool is_valid_file
= true;
213 private bool initial_scan
= false;
215 private string channel_title
;
216 private string channel_link
;
217 private string channel_description
;
219 private Item current_item
;
220 private XmlSerializer serializer
;
222 public FeedIndexableGenerator (AkregatorQueryable queryable
, string feed_file
, bool initial_scan
)
224 this.queryable
= queryable
;
225 this.feed_file
= feed_file
;
226 this.serializer
= queryable
.Serializer
;
227 this.initial_scan
= initial_scan
;
231 public void PostFlushHook ()
235 public string StatusName
{
236 get { return feed_file; }
239 private bool IsUpToDate (string path
)
241 // first check the file date
242 if (queryable
.FileAttributesStore
.IsUpToDate (path
))
244 // if not up to date and initial scan, then we should index
247 // next check the size - its really unlucky if the file is changed
248 // and yet the size is same
249 // FIXME: Maybe store the md5-hash of the file - that is less expensive
250 // than indexing all the feeds in the file!
251 FileInfo file
= new FileInfo (path
);
252 if (queryable
.GetFileSize (path
) != file
.Length
)
257 private void ReadFeedHeader () {
259 if (IsUpToDate (feed_file
)) {
260 is_valid_file
= false;
264 Logger
.Log
.Debug ("Opening feed file: {0}", feed_file
);
265 reader
= new XmlTextReader (feed_file
);
266 reader
.WhitespaceHandling
= WhitespaceHandling
.None
;
268 is_valid_file
= true;
270 // move to beginning of document
271 reader
.MoveToContent();
272 // move to <rss ...> node
273 reader
.ReadStartElement ("rss");
274 // move to <channel> node
275 reader
.ReadStartElement ("channel");
280 string elementName
= reader
.Name
;
281 if (elementName
== "item")
283 switch (elementName
) {
285 reader
.ReadStartElement ("title");
286 channel_title
= reader
.ReadString ();
287 reader
.ReadEndElement ();
291 reader
.ReadStartElement ("link");
292 channel_link
= reader
.ReadString ();
293 reader
.ReadEndElement ();
297 reader
.ReadStartElement ("description");
298 channel_description
= reader
.ReadString ();
299 reader
.ReadEndElement ();
302 // ignore other elements
304 reader
.ReadOuterXml ();
307 } while (!reader
.EOF
&& reader
.NodeType
== XmlNodeType
.Element
);
308 } catch (XmlException ex
) {
309 Logger
.Log
.Debug ("Invalid feed file: " + ex
.Message
);
310 is_valid_file
= false;
315 public bool HasNextIndexable ()
318 if (!is_valid_file
|| reader
== null)
320 string itemString
= "";
322 // check if the reader is at the startnode
323 if (reader
.NodeType
== XmlNodeType
.Element
) {
324 itemString
= reader
.ReadOuterXml ();
325 // form node object from the <node>...</node> string
326 // FIXME Deserialize is expensive - remove it altogether
327 current_item
= (Item
) serializer
.Deserialize (new StringReader (itemString
));
329 } catch (XmlException ex
) {
330 // probably no more <item>
333 if (current_item
== null) {
334 //Logger.Log.Debug ("AkregatorQ: Probably no more feeds left in " + feed_file);
335 //Logger.Log.Debug ("Causing string = " + itemString);
337 is_valid_file
= false;
342 return is_valid_file
;
345 private void StoreFileSize ()
347 // cache the file size
348 FileInfo file
= new FileInfo (feed_file
);
349 queryable
.SetFileSize (feed_file
, file
.Length
);
352 public Indexable
GetNextIndexable ()
354 if (current_item
!= null || !current_item
.IsDeleted
)
355 return current_itemToIndexable ();
360 private Indexable
current_itemToIndexable ()
363 if (current_item
== null)
366 Logger
.Log
.Debug ("Indexing " + channel_link
+ ":" + current_item
.Link
);
367 Indexable indexable
= new Indexable (new Uri (String
.Format ("feed:{0};item={1}", channel_link
, current_item
.Link
)));
368 indexable
.ParentUri
= UriFu
.PathToFileUri (feed_file
);
369 indexable
.MimeType
= "text/html";
370 indexable
.HitType
= "FeedItem";
372 string RFC822
= "ddd, dd MMM yyyy HH:mm:ss zzz";
373 DateTime date
= DateTime
.ParseExact(current_item
.PubDate
, RFC822
, DateTimeFormatInfo
.InvariantInfo
, DateTimeStyles
.AdjustToUniversal
);
374 indexable
.Timestamp
= date
;
376 // replace property names with Dublin Core names
377 indexable
.AddProperty (Property
.New ("dc:title", current_item
.Title
));
378 indexable
.AddProperty (Property
.NewDate ("dc:date", date
));
379 indexable
.AddProperty (Property
.NewKeyword ("dc:identifier", current_item
.Link
));
380 indexable
.AddProperty (Property
.NewKeyword ("dc:source", channel_link
));
381 indexable
.AddProperty (Property
.New ("dc:publisher", channel_title
));
383 StringReader reader
= new StringReader (current_item
.Description
);
384 indexable
.SetTextReader (reader
);
391 public class MetaInfo
{
393 public string value = "";
394 [XmlAttribute ("type")] public string Type
= "";
397 // we will deserialize XML fragments, so there wont be any <? xml ... ?>
398 [System
.Xml
.Serialization
.XmlRoot("item", Namespace
="", IsNullable
=false)]
399 [System
.Xml
.Serialization
.XmlType("item", Namespace
="")]
401 [XmlElement ("pubDate")] public string PubDate
;
402 [XmlElement ("title")] public string Title
= "";
403 [XmlElement ("description")] public string Description
="";
404 [XmlElement ("link")] public string Link
="";
405 [XmlElement ("meta", typeof (MetaInfo
), Namespace
="http://foobar")]
406 public ArrayList MetaList
{
407 get { return metaList; }
408 set { metaList = value; }
410 private ArrayList metaList
= new ArrayList ();
412 public bool IsDeleted
{
414 for (int i
=0; i
<metaList
.Count
; ++i
) {
415 MetaInfo meta
= (MetaInfo
)metaList
[i
];
416 if (meta
.Type
== "deleted" && meta
.value == "true") {