2 // AkregatorQueryable.cs
4 // Copyright (C) 2005 Debajyoti Bera
7 // Permission is hereby granted, free of charge, to any person obtaining a
8 // copy of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the
12 // Software is furnished to do so, subject to the following conditions:
14 // The above copyright notice and this permission notice shall be included in
15 // all copies or substantial portions of the Software.
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
29 using System
.Threading
;
32 using System
.Xml
.Serialization
;
33 using System
.Globalization
;
38 namespace Beagle
.Daemon
.AkregatorQueryable
{
40 [QueryableFlavor (Name
="Akregator", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
41 public class AkregatorQueryable
: LuceneFileQueryable
{
43 private static Logger log
= Logger
.Get ("AkregatorQueryable");
47 // construct a serializer and keep it handy for indexablegenerator to use
48 private XmlSerializer serializer
= null;
49 public XmlSerializer Serializer
{
51 if (serializer
== null)
52 serializer
= new XmlSerializer (typeof (Item
));
57 // store the file size indexed by the filenames
58 // akregator unnecessarily saves files
59 private Hashtable file_sizes
;
60 public long GetFileSize (string name
)
62 if (! file_sizes
.Contains (name
))
64 return (long)file_sizes
[name
];
66 public void SetFileSize (string name
, long size
)
68 file_sizes
[name
] = size
;
71 // add versioning of index
72 // v1: change property names to DC names,
73 // store feed_file as ParentUri
74 // v2: remove dc:date, use Timestamp property.
75 private const int INDEX_VERSION
= 2;
77 public AkregatorQueryable () : base ("AkregatorIndex", INDEX_VERSION
)
79 akregator_dir
= Path
.Combine (PathFinder
.HomeDir
, ".kde");
80 akregator_dir
= Path
.Combine (akregator_dir
, "share");
81 akregator_dir
= Path
.Combine (akregator_dir
, "apps");
82 akregator_dir
= Path
.Combine (akregator_dir
, "akregator");
83 akregator_dir
= Path
.Combine (akregator_dir
, "Archive");
85 file_sizes
= new Hashtable ();
88 /////////////////////////////////////////////////
90 public override void Start ()
94 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
97 private void StartWorker ()
99 if (!Directory
.Exists (akregator_dir
)) {
100 GLib
.Timeout
.Add (60000, new GLib
.TimeoutHandler (CheckForExistence
));
104 if (Inotify
.Enabled
) {
105 Inotify
.EventType mask
= Inotify
.EventType
.CloseWrite
106 | Inotify
.EventType
.Delete
;
108 Inotify
.Subscribe (akregator_dir
, OnInotifyEvent
, mask
);
110 FileSystemWatcher fsw
= new FileSystemWatcher ();
111 fsw
.Path
= akregator_dir
;
113 fsw
.Changed
+= new FileSystemEventHandler (OnChanged
);
114 fsw
.Created
+= new FileSystemEventHandler (OnChanged
);
116 fsw
.EnableRaisingEvents
= true;
119 log
.Info ("Scanning Akregator feeds...");
121 State
= QueryableState
.Crawling
;
122 Stopwatch stopwatch
= new Stopwatch ();
125 DirectoryInfo dir
= new DirectoryInfo (akregator_dir
);
127 foreach (FileInfo file
in DirectoryWalker
.GetFileInfos (dir
)) {
128 if (file
.Extension
== ".xml") {
129 IndexSingleFeed (file
.FullName
, true);
134 State
= QueryableState
.Idle
;
136 log
.Info ("{0} files will be parsed (scanned in {1})", count
, stopwatch
);
139 private bool CheckForExistence ()
141 if (!Directory
.Exists (akregator_dir
))
149 /////////////////////////////////////////////////
151 // Modified/Created event using Inotify
153 private void OnInotifyEvent (Inotify
.Watch watch
,
157 Inotify
.EventType type
)
159 if (subitem
== "" || !subitem
.EndsWith (".xml"))
162 if ((type
& Inotify
.EventType
.CloseWrite
) != 0)
163 IndexSingleFeed (Path
.Combine (path
, subitem
), false);
164 else if ((type
& Inotify
.EventType
.Delete
) != 0)
165 RemoveFeedFile (Path
.Combine (path
, subitem
));
168 // Modified/Created event using FSW
170 private void OnChanged (object o
, FileSystemEventArgs args
)
172 IndexSingleFeed (args
.FullPath
, false);
175 /////////////////////////////////////////////////
177 // Parse and index a single feed
179 private void IndexSingleFeed (string filename
, bool initial_scan
) {
180 if (! filename
.EndsWith (".xml"))
182 if (ThisScheduler
.ContainsByTag (filename
)) {
183 Logger
.Log
.Debug ("Not adding task for already running task: {0}", filename
);
187 FeedIndexableGenerator generator
= new FeedIndexableGenerator (this, filename
, initial_scan
);
189 task
= NewAddTask (generator
);
191 ThisScheduler
.Add (task
);
194 private void RemoveFeedFile (string file
) {
195 Logger
.Log
.Debug ("Removing Akregator feedfile:" + file
);
196 Uri uri
= UriFu
.PathToFileUri (file
);
197 Scheduler
.Task task
= NewRemoveTask (uri
);
198 task
.Priority
= Scheduler
.Priority
.Immediate
;
199 task
.SubPriority
= 0;
200 ThisScheduler
.Add (task
);
206 * Indexable generator for Akregator Feeds
208 public class FeedIndexableGenerator
: IIndexableGenerator
{
209 private string feed_file
;
210 private AkregatorQueryable queryable
;
212 private XmlTextReader reader
;
213 private bool is_valid_file
= true;
214 private bool initial_scan
= false;
216 private string channel_title
;
217 private string channel_link
;
218 private string channel_description
;
220 private Item current_item
;
221 private XmlSerializer serializer
;
223 public FeedIndexableGenerator (AkregatorQueryable queryable
, string feed_file
, bool initial_scan
)
225 this.queryable
= queryable
;
226 this.feed_file
= feed_file
;
227 this.serializer
= queryable
.Serializer
;
228 this.initial_scan
= initial_scan
;
232 public void PostFlushHook ()
236 public string StatusName
{
237 get { return feed_file; }
240 private bool IsUpToDate (string path
)
242 // first check the file date
243 if (queryable
.FileAttributesStore
.IsUpToDate (path
))
245 // if not up to date and initial scan, then we should index
248 // next check the size - its really unlucky if the file is changed
249 // and yet the size is same
250 // FIXME: Maybe store the md5-hash of the file - that is less expensive
251 // than indexing all the feeds in the file!
252 FileInfo file
= new FileInfo (path
);
253 if (queryable
.GetFileSize (path
) != file
.Length
)
258 private void ReadFeedHeader () {
260 if (IsUpToDate (feed_file
)) {
261 is_valid_file
= false;
265 Logger
.Log
.Debug ("Opening feed file: {0}", feed_file
);
266 reader
= new XmlTextReader (feed_file
);
267 reader
.WhitespaceHandling
= WhitespaceHandling
.None
;
269 is_valid_file
= true;
271 // move to beginning of document
272 reader
.MoveToContent();
273 // move to <rss ...> node
274 reader
.ReadStartElement ("rss");
275 // move to <channel> node
276 reader
.ReadStartElement ("channel");
281 string elementName
= reader
.Name
;
282 if (elementName
== "item")
284 switch (elementName
) {
286 reader
.ReadStartElement ("title");
287 channel_title
= reader
.ReadString ();
288 reader
.ReadEndElement ();
292 reader
.ReadStartElement ("link");
293 channel_link
= reader
.ReadString ();
294 reader
.ReadEndElement ();
298 reader
.ReadStartElement ("description");
299 channel_description
= reader
.ReadString ();
300 reader
.ReadEndElement ();
303 // ignore other elements
305 reader
.ReadOuterXml ();
308 } while (!reader
.EOF
&& reader
.NodeType
== XmlNodeType
.Element
);
309 } catch (XmlException ex
) {
310 Logger
.Log
.Warn (ex
, "Caught exception parsing feed file:");
311 is_valid_file
= false;
316 public bool HasNextIndexable ()
319 if (!is_valid_file
|| reader
== null)
321 string itemString
= "";
323 // check if the reader is at the startnode
324 if (reader
.NodeType
== XmlNodeType
.Element
) {
325 itemString
= reader
.ReadOuterXml ();
326 // form node object from the <node>...</node> string
327 // FIXME Deserialize is expensive - remove it altogether
328 current_item
= (Item
) serializer
.Deserialize (new StringReader (itemString
));
330 } catch (XmlException ex
) {
331 // probably no more <item>
334 if (current_item
== null) {
335 //Logger.Log.Debug ("AkregatorQ: Probably no more feeds left in " + feed_file);
336 //Logger.Log.Debug ("Causing string = " + itemString);
338 is_valid_file
= false;
343 return is_valid_file
;
346 private void StoreFileSize ()
348 // cache the file size
349 FileInfo file
= new FileInfo (feed_file
);
350 queryable
.SetFileSize (feed_file
, file
.Length
);
353 public Indexable
GetNextIndexable ()
355 if (current_item
!= null || !current_item
.IsDeleted
)
356 return current_itemToIndexable ();
361 private Indexable
current_itemToIndexable ()
364 if (current_item
== null)
367 //Logger.Log.Debug ("Indexing " + channel_link + ":" + current_item.Link);
368 Indexable indexable
= new Indexable (new Uri (String
.Format ("feed:{0};item={1}", channel_link
, current_item
.Link
)));
369 indexable
.ParentUri
= UriFu
.PathToFileUri (feed_file
);
370 indexable
.MimeType
= "text/html";
371 indexable
.HitType
= "FeedItem";
373 string RFC822
= "ddd, dd MMM yyyy HH:mm:ss zzz";
374 DateTime date
= DateTime
.ParseExact(current_item
.PubDate
, RFC822
, DateTimeFormatInfo
.InvariantInfo
, DateTimeStyles
.AdjustToUniversal
);
375 indexable
.Timestamp
= date
;
377 // replace property names with Dublin Core names
378 indexable
.AddProperty (Property
.New ("dc:title", current_item
.Title
));
379 indexable
.AddProperty (Property
.NewKeyword ("dc:identifier", current_item
.Link
));
380 indexable
.AddProperty (Property
.NewKeyword ("dc:source", channel_link
));
381 indexable
.AddProperty (Property
.New ("dc:publisher", channel_title
));
383 StringReader reader
= new StringReader (current_item
.Description
);
384 indexable
.SetTextReader (reader
);
391 public class MetaInfo
{
393 public string value = "";
394 [XmlAttribute ("type")] public string Type
= "";
397 // we will deserialize XML fragments, so there wont be any <? xml ... ?>
398 [System
.Xml
.Serialization
.XmlRoot("item", Namespace
="", IsNullable
=false)]
399 [System
.Xml
.Serialization
.XmlType("item", Namespace
="")]
401 [XmlElement ("pubDate")] public string PubDate
;
402 [XmlElement ("title")] public string Title
= "";
403 [XmlElement ("description")] public string Description
="";
404 [XmlElement ("link")] public string Link
="";
405 [XmlElement ("meta", typeof (MetaInfo
), Namespace
="http://foobar")]
406 public ArrayList MetaList
{
407 get { return metaList; }
408 set { metaList = value; }
410 private ArrayList metaList
= new ArrayList ();
412 public bool IsDeleted
{
414 for (int i
=0; i
<metaList
.Count
; ++i
) {
415 MetaInfo meta
= (MetaInfo
)metaList
[i
];
416 if (meta
.Type
== "deleted" && meta
.value == "true") {