* tools/Info.cs: Add --list-backends, --list-static-indexes to
[beagle.git] / beagled / LifereaQueryable / LifereaQueryable.cs
blob6b69499951c0ea665e7d4511a1831dc2da77516e
1 //
2 // LifereaQueryable.cs
3 //
4 // Copyright (C) 2005 Carl-Emil Lagerstedt
5 // Copyright (C) 2005 Novell, Inc.
6 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Collections;
30 using System.Threading;
32 using System.Xml;
33 using System.Xml.Serialization;
35 using Beagle.Daemon;
36 using Beagle.Util;
38 namespace Beagle.Daemon.LifereaQueryable {
40 [QueryableFlavor (Name="Liferea", Domain=QueryDomain.Local, RequireInotify=false)]
41 public class LifereaQueryable : LuceneFileQueryable {
43 private static Logger log = Logger.Get ("LifereaQueryable");
45 string liferea_dir;
47 private XmlSerializer serializer = null;
48 public XmlSerializer Serializer {
49 get {
50 if (serializer == null)
51 serializer = new XmlSerializer (typeof (Item));
52 return serializer;
56 // add versioning info
57 // v1: change property names to match DC element names
58 private const int INDEX_VERSION = 1;
60 public LifereaQueryable () : base ("LifereaIndex", INDEX_VERSION)
62 liferea_dir = Path.Combine (PathFinder.HomeDir, ".liferea");
63 liferea_dir = Path.Combine (liferea_dir, "cache");
64 liferea_dir = Path.Combine (liferea_dir, "feeds");
67 /////////////////////////////////////////////////
69 public override void Start ()
71 base.Start ();
73 ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
76 private void StartWorker ()
78 if (!Directory.Exists (liferea_dir)) {
79 GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
80 return;
83 if (Inotify.Enabled) {
84 Inotify.EventType mask = Inotify.EventType.CloseWrite
85 | Inotify.EventType.Delete;
87 Inotify.Subscribe (liferea_dir, OnInotifyEvent, mask);
88 } else {
89 FileSystemWatcher fsw = new FileSystemWatcher ();
90 fsw.Path = liferea_dir;
92 fsw.Changed += new FileSystemEventHandler (OnChanged);
93 fsw.Created += new FileSystemEventHandler (OnChanged);
95 fsw.EnableRaisingEvents = true;
98 log.Info ("Scanning Liferea feeds...");
100 State = QueryableState.Crawling;
101 Stopwatch stopwatch = new Stopwatch ();
102 stopwatch.Start ();
104 DirectoryInfo dir = new DirectoryInfo (liferea_dir);
105 int count = 0;
106 foreach (FileInfo file in DirectoryWalker.GetFileInfos (dir)) {
107 IndexSingleFeed (file.FullName);
110 State = QueryableState.Idle;
111 stopwatch.Stop ();
112 log.Info ("{0} files will be parsed (scanned in {1})", count, stopwatch);
115 private bool CheckForExistence ()
117 if (!Directory.Exists (liferea_dir))
118 return true;
120 this.Start ();
122 return false;
125 /////////////////////////////////////////////////
127 // Modified/Created event using Inotify
129 private void OnInotifyEvent (Inotify.Watch watch,
130 string path,
131 string subitem,
132 string srcpath,
133 Inotify.EventType type)
135 // someone reported that backup files with abcd~
136 // were being generated
137 if (subitem == "" || subitem.EndsWith ("~"))
138 return;
140 if ((type & Inotify.EventType.CloseWrite) != 0)
141 IndexSingleFeed (Path.Combine (path, subitem));
142 else if ((type & Inotify.EventType.Delete) != 0)
143 Removefeed_file (Path.Combine (path, subitem));
146 // Modified/Created event using FSW
148 private void OnChanged (object o, FileSystemEventArgs args)
150 IndexSingleFeed (args.FullPath);
153 /////////////////////////////////////////////////
155 private void IndexSingleFeed (string filename) {
156 if (ThisScheduler.ContainsByTag (filename)) {
157 Logger.Log.Debug ("Not adding task for already running task: {0}", filename);
158 return;
161 FeedIndexableGenerator generator = new FeedIndexableGenerator (this, filename);
162 Scheduler.Task task;
163 task = NewAddTask (generator);
164 task.Tag = filename;
165 task.Source = this;
166 ThisScheduler.Add (task);
169 private void Removefeed_file (string file) {
170 Logger.Log.Debug ("Removing Liferea feed_file:" + file);
171 Uri uri = UriFu.PathToFileUri (file);
172 Scheduler.Task task = NewRemoveTask (uri);
173 task.Priority = Scheduler.Priority.Immediate;
174 task.SubPriority = 0;
175 ThisScheduler.Add (task);
182 * Indexable generator for Liferea Feeds
184 public class FeedIndexableGenerator : IIndexableGenerator {
185 private string feed_file;
186 private LifereaQueryable queryable;
188 private XmlTextReader reader;
189 private bool is_valid_file = true;
191 private string feed_source = "";
192 private string publisher = "";
193 private Item current_item;
194 private XmlSerializer serializer;
196 public FeedIndexableGenerator (LifereaQueryable queryable, string feed_file)
198 this.queryable = queryable;
199 this.feed_file = feed_file;
200 this.serializer = queryable.Serializer;
201 ReadFeedHeader ();
204 public void PostFlushHook ()
206 current_item = null;
207 //queryable.FileAttributesStore.AttachLastWriteTime (feed_file, DateTime.UtcNow);
210 public string StatusName {
211 get { return feed_file; }
214 private bool IsUpToDate (string path)
216 return queryable.FileAttributesStore.IsUpToDate (path);
219 private void ReadFeedHeader () {
221 if (IsUpToDate (feed_file)) {
222 is_valid_file = false;
223 return;
225 try {
226 Logger.Log.Debug ("Opening liferea feed file: {0}", feed_file);
227 reader = new XmlTextReader (feed_file);
228 reader.WhitespaceHandling = WhitespaceHandling.None;
230 is_valid_file = true;
232 // move to beginning of document
233 reader.MoveToContent();
234 // move to <feed> node
235 reader.ReadStartElement ("feed");
237 do {
238 string elementName = reader.Name;
239 if (elementName == "item")
240 break;
241 switch (elementName) {
242 case "feedSource":
243 reader.ReadStartElement ("feedSource");
244 feed_source = reader.ReadString ();
245 reader.ReadEndElement ();
246 break;
247 case "feedTitle":
248 reader.ReadStartElement ("feedTitle");
249 publisher = reader.ReadString ();
250 reader.ReadEndElement ();
251 break;
252 // ignore other elements
253 default:
254 reader.ReadOuterXml ();
255 break;
257 } while (!reader.EOF && reader.NodeType == XmlNodeType.Element);
258 } catch (XmlException ex) {
259 Logger.Log.Debug ("Invalid feed file: " + ex.Message);
260 is_valid_file = false;
261 reader.Close ();
265 public bool HasNextIndexable ()
267 current_item = null;
268 if (!is_valid_file || reader == null)
269 return false;
270 string itemString = "";
271 try {
272 // check if the reader is at the startnode
273 if (reader.NodeType == XmlNodeType.Element) {
274 itemString = reader.ReadOuterXml ();
275 // form node object from the <node>...</node> string
276 // FIXME Deserialize(...) is expensive - remove it altogether
277 current_item = (Item) serializer.Deserialize (new StringReader (itemString));
279 } catch (XmlException ex) {
280 // probably no more <item>
283 if (current_item == null) {
284 //Logger.Log.Debug ("LifereaQ: Probably no more feeds left in " + feed_file);
285 //Logger.Log.Debug ("Causing string = " + itemString);
286 current_item = null;
287 is_valid_file = false;
288 reader.Close ();
290 return is_valid_file;
293 public Indexable GetNextIndexable ()
295 if (current_item != null)
296 return current_itemToIndexable ();
297 else
298 return null;
301 private Indexable current_itemToIndexable ()
303 Indexable indexable;
304 try {
305 indexable = new Indexable (new Uri (String.Format ("{0};item={1}", feed_source, current_item.Source)));
306 } catch (System.UriFormatException) {
307 indexable = new Indexable (new Uri (String.Format ("liferea://dummy?{0};item={1}", feed_source, current_item.Source)));
309 indexable.ParentUri = UriFu.PathToFileUri (feed_file);
310 indexable.MimeType = "text/html";
311 indexable.HitType = "FeedItem";
313 DateTime date = new DateTime (1970, 1, 1);
314 date = date.AddSeconds (current_item.Timestamp);
315 indexable.Timestamp = date;
317 // cleaning up the property names as far as possible
318 // this way querying for specific field is possible
319 // following DC element names wherever applicable
321 indexable.AddProperty (Property.New ("dc:title", current_item.Title));
322 Attribute[] attribs = current_item.Attribs.AttribArray;
323 if (attribs != null) {
324 foreach (Attribute attrib in attribs) {
325 if (attrib.Name != "author")
326 continue;
327 indexable.AddProperty (Property.New ("dc:creator", attrib.Value));
330 indexable.AddProperty (Property.NewDate ("dc:date", date));
331 indexable.AddProperty (Property.NewKeyword ("dc:identifier", current_item.Source));
332 indexable.AddProperty (Property.NewKeyword ("dc:source", feed_source));
333 indexable.AddProperty (Property.New ("dc:publisher", publisher));
335 StringReader reader = new StringReader (current_item.Description);
336 indexable.SetTextReader (reader);
338 return indexable;
342 [System.Xml.Serialization.XmlRoot("item", Namespace="", IsNullable=false)]
343 [System.Xml.Serialization.XmlType("item", Namespace="")]
344 public class Item {
345 [XmlElement ("title")] public string Title = "";
346 [XmlElement ("description")] public string Description ="";
347 [XmlElement ("source")] public string Source="";
348 [XmlElement ("attributes")] public Attributes Attribs;
349 [XmlElement ("time")] public long Timestamp;
352 public class Attributes {
353 [XmlElement ("attribute")] public Attribute[] AttribArray;
356 public class Attribute{
357 [XmlAttribute ("name")] public string Name = "";
358 [XmlTextAttribute] public string Value = "";