Remove some debug spew
[beagle.git] / beagled / LifereaQueryable / LifereaQueryable.cs
blob20fc8f62f463349b4fd59f4669627625d18727a5
1 //
2 // LifereaQueryable.cs
3 //
4 // Copyright (C) 2005 Carl-Emil Lagerstedt
5 // Copyright (C) 2005 Novell, Inc.
6 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Collections;
30 using System.Threading;
32 using System.Xml;
33 using System.Xml.Serialization;
35 using Beagle.Daemon;
36 using Beagle.Util;
38 namespace Beagle.Daemon.LifereaQueryable {
40 [QueryableFlavor (Name="Liferea", Domain=QueryDomain.Local, RequireInotify=false)]
41 public class LifereaQueryable : LuceneFileQueryable {
43 private static Logger log = Logger.Get ("LifereaQueryable");
45 string liferea_dir;
47 private XmlSerializer serializer = null;
48 public XmlSerializer Serializer {
49 get {
50 if (serializer == null)
51 serializer = new XmlSerializer (typeof (Item));
52 return serializer;
56 // add versioning info
57 // v1: change property names to match DC element names
58 // v2: remove dc:date, use Timestamp property.
59 private const int INDEX_VERSION = 2;
61 public LifereaQueryable () : base ("LifereaIndex", INDEX_VERSION)
63 liferea_dir = Path.Combine (PathFinder.HomeDir, ".liferea");
64 liferea_dir = Path.Combine (liferea_dir, "cache");
65 liferea_dir = Path.Combine (liferea_dir, "feeds");
68 /////////////////////////////////////////////////
70 public override void Start ()
72 base.Start ();
74 ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
77 private void StartWorker ()
79 if (!Directory.Exists (liferea_dir)) {
80 GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
81 return;
84 if (Inotify.Enabled) {
85 Inotify.EventType mask = Inotify.EventType.CloseWrite
86 | Inotify.EventType.Delete;
88 Inotify.Subscribe (liferea_dir, OnInotifyEvent, mask);
89 } else {
90 FileSystemWatcher fsw = new FileSystemWatcher ();
91 fsw.Path = liferea_dir;
93 fsw.Changed += new FileSystemEventHandler (OnChanged);
94 fsw.Created += new FileSystemEventHandler (OnChanged);
96 fsw.EnableRaisingEvents = true;
99 log.Info ("Scanning Liferea feeds...");
101 State = QueryableState.Crawling;
102 Stopwatch stopwatch = new Stopwatch ();
103 stopwatch.Start ();
105 DirectoryInfo dir = new DirectoryInfo (liferea_dir);
106 int count = 0;
107 foreach (FileInfo file in DirectoryWalker.GetFileInfos (dir)) {
108 IndexSingleFeed (file.FullName);
111 State = QueryableState.Idle;
112 stopwatch.Stop ();
113 log.Info ("{0} files will be parsed (scanned in {1})", count, stopwatch);
116 private bool CheckForExistence ()
118 if (!Directory.Exists (liferea_dir))
119 return true;
121 this.Start ();
123 return false;
126 /////////////////////////////////////////////////
128 // Modified/Created event using Inotify
130 private void OnInotifyEvent (Inotify.Watch watch,
131 string path,
132 string subitem,
133 string srcpath,
134 Inotify.EventType type)
136 // someone reported that backup files with abcd~
137 // were being generated
138 if (subitem == "" || subitem.EndsWith ("~"))
139 return;
141 if ((type & Inotify.EventType.CloseWrite) != 0)
142 IndexSingleFeed (Path.Combine (path, subitem));
143 else if ((type & Inotify.EventType.Delete) != 0)
144 Removefeed_file (Path.Combine (path, subitem));
147 // Modified/Created event using FSW
149 private void OnChanged (object o, FileSystemEventArgs args)
151 IndexSingleFeed (args.FullPath);
154 /////////////////////////////////////////////////
156 private void IndexSingleFeed (string filename) {
157 if (ThisScheduler.ContainsByTag (filename)) {
158 Logger.Log.Debug ("Not adding task for already running task: {0}", filename);
159 return;
162 FeedIndexableGenerator generator = new FeedIndexableGenerator (this, filename);
163 Scheduler.Task task;
164 task = NewAddTask (generator);
165 task.Tag = filename;
166 task.Source = this;
167 ThisScheduler.Add (task);
170 private void Removefeed_file (string file) {
171 Logger.Log.Debug ("Removing Liferea feed_file:" + file);
172 Uri uri = UriFu.PathToFileUri (file);
173 Scheduler.Task task = NewRemoveTask (uri);
174 task.Priority = Scheduler.Priority.Immediate;
175 task.SubPriority = 0;
176 ThisScheduler.Add (task);
183 * Indexable generator for Liferea Feeds
185 public class FeedIndexableGenerator : IIndexableGenerator {
186 private string feed_file;
187 private LifereaQueryable queryable;
189 private XmlTextReader reader;
190 private bool is_valid_file = true;
192 private string feed_source = "";
193 private string publisher = "";
194 private Item current_item;
195 private XmlSerializer serializer;
197 public FeedIndexableGenerator (LifereaQueryable queryable, string feed_file)
199 this.queryable = queryable;
200 this.feed_file = feed_file;
201 this.serializer = queryable.Serializer;
202 ReadFeedHeader ();
205 public void PostFlushHook ()
207 current_item = null;
208 //queryable.FileAttributesStore.AttachLastWriteTime (feed_file, DateTime.UtcNow);
211 public string StatusName {
212 get { return feed_file; }
215 private bool IsUpToDate (string path)
217 return queryable.FileAttributesStore.IsUpToDate (path);
220 private void ReadFeedHeader () {
222 if (IsUpToDate (feed_file)) {
223 is_valid_file = false;
224 return;
226 try {
227 Logger.Log.Debug ("Opening liferea feed file: {0}", feed_file);
228 reader = new XmlTextReader (feed_file);
229 reader.WhitespaceHandling = WhitespaceHandling.None;
231 is_valid_file = true;
233 // move to beginning of document
234 reader.MoveToContent();
235 // move to <feed> node
236 reader.ReadStartElement ("feed");
238 do {
239 string elementName = reader.Name;
240 if (elementName == "item")
241 break;
242 switch (elementName) {
243 case "feedSource":
244 reader.ReadStartElement ("feedSource");
245 feed_source = reader.ReadString ();
246 reader.ReadEndElement ();
247 break;
248 case "feedTitle":
249 reader.ReadStartElement ("feedTitle");
250 publisher = reader.ReadString ();
251 reader.ReadEndElement ();
252 break;
253 // ignore other elements
254 default:
255 reader.ReadOuterXml ();
256 break;
258 } while (!reader.EOF && reader.NodeType == XmlNodeType.Element);
259 } catch (XmlException ex) {
260 Logger.Log.Warn (ex, "Caught exception parsing feed file:");
261 is_valid_file = false;
262 reader.Close ();
266 public bool HasNextIndexable ()
268 current_item = null;
269 if (!is_valid_file || reader == null)
270 return false;
271 string itemString = "";
272 try {
273 // check if the reader is at the startnode
274 if (reader.NodeType == XmlNodeType.Element) {
275 itemString = reader.ReadOuterXml ();
276 // form node object from the <node>...</node> string
277 // FIXME Deserialize(...) is expensive - remove it altogether
278 current_item = (Item) serializer.Deserialize (new StringReader (itemString));
280 } catch (XmlException ex) {
281 // probably no more <item>
284 if (current_item == null) {
285 //Logger.Log.Debug ("LifereaQ: Probably no more feeds left in " + feed_file);
286 //Logger.Log.Debug ("Causing string = " + itemString);
287 current_item = null;
288 is_valid_file = false;
289 reader.Close ();
291 return is_valid_file;
294 public Indexable GetNextIndexable ()
296 if (current_item != null)
297 return current_itemToIndexable ();
298 else
299 return null;
302 private Indexable current_itemToIndexable ()
304 Indexable indexable;
305 try {
306 indexable = new Indexable (new Uri (String.Format ("{0};item={1}", feed_source, current_item.Source)));
307 } catch (System.UriFormatException) {
308 indexable = new Indexable (new Uri (String.Format ("liferea://dummy?{0};item={1}", feed_source, current_item.Source)));
310 indexable.ParentUri = UriFu.PathToFileUri (feed_file);
311 indexable.MimeType = "text/html";
312 indexable.HitType = "FeedItem";
314 DateTime date = new DateTime (1970, 1, 1);
315 date = date.AddSeconds (current_item.Timestamp);
316 indexable.Timestamp = date;
318 // cleaning up the property names as far as possible
319 // this way querying for specific field is possible
320 // following DC element names wherever applicable
322 indexable.AddProperty (Property.New ("dc:title", current_item.Title));
323 Attribute[] attribs = current_item.Attribs.AttribArray;
324 if (attribs != null) {
325 foreach (Attribute attrib in attribs) {
326 if (attrib.Name != "author")
327 continue;
328 indexable.AddProperty (Property.New ("dc:creator", attrib.Value));
331 indexable.AddProperty (Property.NewKeyword ("dc:identifier", current_item.Source));
332 indexable.AddProperty (Property.NewKeyword ("dc:source", feed_source));
333 indexable.AddProperty (Property.New ("dc:publisher", publisher));
335 StringReader reader = new StringReader (current_item.Description);
336 indexable.SetTextReader (reader);
338 return indexable;
342 [System.Xml.Serialization.XmlRoot("item", Namespace="", IsNullable=false)]
343 [System.Xml.Serialization.XmlType("item", Namespace="")]
344 public class Item {
345 [XmlElement ("title")] public string Title = "";
346 [XmlElement ("description")] public string Description ="";
347 [XmlElement ("source")] public string Source="";
348 [XmlElement ("attributes")] public Attributes Attribs;
349 [XmlElement ("time")] public long Timestamp;
352 public class Attributes {
353 [XmlElement ("attribute")] public Attribute[] AttribArray;
356 public class Attribute{
357 [XmlAttribute ("name")] public string Name = "";
358 [XmlTextAttribute] public string Value = "";