Fix a fd leak in knotes backend (stupid me) and some cleanup in kaddrbook backend.
[beagle.git] / beagled / LifereaQueryable / LifereaQueryable.cs
blobdb78d57b93b70c2bbb1e9bbdcf6c230f7deff3dd
1 //
2 // LifereaQueryable.cs
3 //
4 // Copyright (C) 2005 Carl-Emil Lagerstedt
5 // Copyright (C) 2005 Novell, Inc.
6 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Collections;
30 using System.Threading;
32 using System.Xml;
33 using System.Xml.Serialization;
35 using Beagle.Daemon;
36 using Beagle.Util;
38 namespace Beagle.Daemon.LifereaQueryable {
40 [QueryableFlavor (Name="Liferea", Domain=QueryDomain.Local, RequireInotify=false)]
41 public class LifereaQueryable : LuceneFileQueryable {
43 private static Logger log = Logger.Get ("LifereaQueryable");
45 string liferea_dir;
46 internal string icon_dir;
48 private XmlSerializer serializer = null;
49 public XmlSerializer Serializer {
50 get {
51 if (serializer == null)
52 serializer = new XmlSerializer (typeof (Item));
53 return serializer;
57 // add versioning info
58 // v1: change property names to match DC element names
59 // v2: remove dc:date, use Timestamp property.
60 private const int INDEX_VERSION = 2;
62 public LifereaQueryable () : base ("LifereaIndex", INDEX_VERSION)
64 liferea_dir = Path.Combine (PathFinder.HomeDir, ".liferea");
65 liferea_dir = Path.Combine (liferea_dir, "cache");
66 icon_dir = Path.Combine (liferea_dir, "favicons");
67 liferea_dir = Path.Combine (liferea_dir, "feeds");
70 /////////////////////////////////////////////////
72 public override void Start ()
74 base.Start ();
76 ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
79 private void StartWorker ()
81 if (!Directory.Exists (liferea_dir)) {
82 GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
83 return;
86 if (Inotify.Enabled) {
87 Inotify.EventType mask = Inotify.EventType.CloseWrite
88 | Inotify.EventType.Delete;
90 Inotify.Subscribe (liferea_dir, OnInotifyEvent, mask);
91 } else {
92 FileSystemWatcher fsw = new FileSystemWatcher ();
93 fsw.Path = liferea_dir;
95 fsw.Changed += new FileSystemEventHandler (OnChanged);
96 fsw.Created += new FileSystemEventHandler (OnChanged);
98 fsw.EnableRaisingEvents = true;
101 log.Info ("Scanning Liferea feeds...");
103 State = QueryableState.Crawling;
104 Stopwatch stopwatch = new Stopwatch ();
105 stopwatch.Start ();
107 DirectoryInfo dir = new DirectoryInfo (liferea_dir);
108 int count = 0;
109 foreach (FileInfo file in DirectoryWalker.GetFileInfos (dir)) {
110 IndexSingleFeed (file.FullName);
113 State = QueryableState.Idle;
114 stopwatch.Stop ();
115 log.Info ("{0} files will be parsed (scanned in {1})", count, stopwatch);
118 private bool CheckForExistence ()
120 if (!Directory.Exists (liferea_dir))
121 return true;
123 this.Start ();
125 return false;
128 /////////////////////////////////////////////////
130 // Modified/Created event using Inotify
132 private void OnInotifyEvent (Inotify.Watch watch,
133 string path,
134 string subitem,
135 string srcpath,
136 Inotify.EventType type)
138 // someone reported that backup files with abcd~
139 // were being generated
140 if (subitem == "" || subitem.EndsWith ("~"))
141 return;
143 if ((type & Inotify.EventType.CloseWrite) != 0)
144 IndexSingleFeed (Path.Combine (path, subitem));
145 else if ((type & Inotify.EventType.Delete) != 0)
146 Removefeed_file (Path.Combine (path, subitem));
149 // Modified/Created event using FSW
151 private void OnChanged (object o, FileSystemEventArgs args)
153 IndexSingleFeed (args.FullPath);
156 /////////////////////////////////////////////////
158 private void IndexSingleFeed (string filename) {
159 if (ThisScheduler.ContainsByTag (filename)) {
160 Logger.Log.Debug ("Not adding task for already running task: {0}", filename);
161 return;
164 FeedIndexableGenerator generator = new FeedIndexableGenerator (this, filename);
165 Scheduler.Task task;
166 task = NewAddTask (generator);
167 task.Tag = filename;
168 task.Source = this;
169 ThisScheduler.Add (task);
172 private void Removefeed_file (string file) {
173 Logger.Log.Debug ("Removing Liferea feed_file:" + file);
174 Uri uri = UriFu.PathToFileUri (file);
175 Scheduler.Task task = NewRemoveTask (uri);
176 task.Priority = Scheduler.Priority.Immediate;
177 task.SubPriority = 0;
178 ThisScheduler.Add (task);
185 * Indexable generator for Liferea Feeds
187 public class FeedIndexableGenerator : IIndexableGenerator {
188 private string feed_file;
189 private string icon_file = null;
190 private LifereaQueryable queryable;
192 private XmlTextReader reader;
193 private bool is_valid_file = true;
195 private string feed_source = "";
196 private string publisher = "";
197 private Item current_item;
198 private XmlSerializer serializer;
200 public FeedIndexableGenerator (LifereaQueryable queryable, string feed_file)
202 this.queryable = queryable;
203 this.feed_file = feed_file;
204 this.serializer = queryable.Serializer;
205 ReadFeedHeader ();
207 // Set icon file
208 string file_name = Path.GetFileNameWithoutExtension (feed_file);
209 this.icon_file = this.queryable.icon_dir;
210 this.icon_file = Path.Combine (icon_file, file_name);
211 this.icon_file = Path.ChangeExtension (icon_file, "png");
214 public void PostFlushHook ()
216 current_item = null;
217 //queryable.FileAttributesStore.AttachLastWriteTime (feed_file, DateTime.UtcNow);
220 public string StatusName {
221 get { return feed_file; }
224 private bool IsUpToDate (string path)
226 return queryable.FileAttributesStore.IsUpToDate (path);
229 private void ReadFeedHeader () {
231 if (IsUpToDate (feed_file)) {
232 is_valid_file = false;
233 return;
235 try {
236 Logger.Log.Debug ("Opening liferea feed file: {0}", feed_file);
237 reader = new XmlTextReader (feed_file);
238 reader.WhitespaceHandling = WhitespaceHandling.None;
240 is_valid_file = true;
242 // move to beginning of document
243 reader.MoveToContent();
244 // move to <feed> node
245 reader.ReadStartElement ("feed");
247 do {
248 string elementName = reader.Name;
249 if (elementName == "item")
250 break;
251 switch (elementName) {
252 case "feedSource":
253 reader.ReadStartElement ("feedSource");
254 feed_source = reader.ReadString ();
255 reader.ReadEndElement ();
256 break;
257 case "feedTitle":
258 reader.ReadStartElement ("feedTitle");
259 publisher = reader.ReadString ();
260 reader.ReadEndElement ();
261 break;
262 // ignore other elements
263 default:
264 reader.ReadOuterXml ();
265 break;
267 } while (!reader.EOF && reader.NodeType == XmlNodeType.Element);
268 } catch (XmlException ex) {
269 Logger.Log.Warn (ex, "Caught exception parsing feed file:");
270 is_valid_file = false;
271 reader.Close ();
275 public bool HasNextIndexable ()
277 current_item = null;
278 if (!is_valid_file || reader == null)
279 return false;
280 string itemString = "";
281 try {
282 // check if the reader is at the startnode
283 if (reader.NodeType == XmlNodeType.Element) {
284 itemString = reader.ReadOuterXml ();
285 // form node object from the <node>...</node> string
286 // FIXME Deserialize(...) is expensive - remove it altogether
287 current_item = (Item) serializer.Deserialize (new StringReader (itemString));
289 } catch (XmlException ex) {
290 // probably no more <item>
293 if (current_item == null) {
294 //Logger.Log.Debug ("LifereaQ: Probably no more feeds left in " + feed_file);
295 //Logger.Log.Debug ("Causing string = " + itemString);
296 current_item = null;
297 is_valid_file = false;
298 reader.Close ();
300 return is_valid_file;
303 public Indexable GetNextIndexable ()
305 if (current_item != null)
306 return current_itemToIndexable ();
307 else
308 return null;
311 private Indexable current_itemToIndexable ()
313 Indexable indexable;
314 try {
315 indexable = new Indexable (new Uri (String.Format ("{0};item={1}", feed_source, current_item.Source)));
316 } catch (System.UriFormatException) {
317 indexable = new Indexable (new Uri (String.Format ("liferea://dummy?{0};item={1}", feed_source, current_item.Source)));
319 indexable.ParentUri = UriFu.PathToFileUri (feed_file);
320 indexable.MimeType = "text/html";
321 indexable.HitType = "FeedItem";
323 DateTime date = new DateTime (1970, 1, 1);
324 date = date.AddSeconds (current_item.Timestamp);
325 indexable.Timestamp = date;
327 // cleaning up the property names as far as possible
328 // this way querying for specific field is possible
329 // following DC element names wherever applicable
331 indexable.AddProperty (Property.New ("dc:title", current_item.Title));
332 Attribute[] attribs = current_item.Attribs.AttribArray;
333 if (attribs != null) {
334 foreach (Attribute attrib in attribs) {
335 if (attrib.Name != "author")
336 continue;
337 indexable.AddProperty (Property.New ("dc:creator", attrib.Value));
340 indexable.AddProperty (Property.NewKeyword ("dc:identifier", current_item.Source));
341 indexable.AddProperty (Property.NewKeyword ("dc:source", feed_source));
342 indexable.AddProperty (Property.New ("dc:publisher", publisher));
344 if (File.Exists (icon_file))
345 indexable.AddProperty (Property.NewUnsearched ("fixme:cachedimg", icon_file));
347 StringReader reader = new StringReader (current_item.Description);
348 indexable.SetTextReader (reader);
350 return indexable;
354 [System.Xml.Serialization.XmlRoot("item", Namespace="", IsNullable=false)]
355 [System.Xml.Serialization.XmlType("item", Namespace="")]
356 public class Item {
357 [XmlElement ("title")] public string Title = "";
358 [XmlElement ("description")] public string Description ="";
359 [XmlElement ("source")] public string Source="";
360 [XmlElement ("attributes")] public Attributes Attribs;
361 [XmlElement ("time")] public long Timestamp;
364 public class Attributes {
365 [XmlElement ("attribute")] public Attribute[] AttribArray;
368 public class Attribute{
369 [XmlAttribute ("name")] public string Name = "";
370 [XmlTextAttribute] public string Value = "";