Oops, fix a scale problem with the RSS size calculation
[beagle.git] / beagled / LifereaQueryable / LifereaQueryable.cs
blob0738e1fa59d7da03d0f41534a9b19107e0059831
1 //
2 // LifereaQueryable.cs
3 //
4 // Copyright (C) 2005 Carl-Emil Lagerstedt
5 // Copyright (C) 2005 Novell, Inc.
6 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Collections;
30 using System.Threading;
32 using System.Xml;
33 using System.Xml.Serialization;
35 using Beagle.Daemon;
36 using Beagle.Util;
38 namespace Beagle.Daemon.LifereaQueryable {
40 [QueryableFlavor (Name="Liferea", Domain=QueryDomain.Local, RequireInotify=false)]
41 public class LifereaQueryable : LuceneFileQueryable {
43 string liferea_dir;
44 internal string icon_dir;
46 private XmlSerializer serializer = null;
47 public XmlSerializer Serializer {
48 get {
49 if (serializer == null)
50 serializer = new XmlSerializer (typeof (Item));
51 return serializer;
55 // add versioning info
56 // v1: change property names to match DC element names
57 // v2: remove dc:date, use Timestamp property.
58 private const int INDEX_VERSION = 2;
60 public LifereaQueryable () : base ("LifereaIndex", INDEX_VERSION)
62 liferea_dir = Path.Combine (PathFinder.HomeDir, ".liferea");
63 liferea_dir = Path.Combine (liferea_dir, "cache");
64 icon_dir = Path.Combine (liferea_dir, "favicons");
65 liferea_dir = Path.Combine (liferea_dir, "feeds");
68 /////////////////////////////////////////////////
70 public override void Start ()
72 base.Start ();
74 ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
77 private void StartWorker ()
79 if (!Directory.Exists (liferea_dir)) {
80 GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
81 return;
84 if (Inotify.Enabled) {
85 Inotify.EventType mask = Inotify.EventType.CloseWrite
86 | Inotify.EventType.Delete;
88 Inotify.Subscribe (liferea_dir, OnInotifyEvent, mask);
89 } else {
90 FileSystemWatcher fsw = new FileSystemWatcher ();
91 fsw.Path = liferea_dir;
93 fsw.Changed += new FileSystemEventHandler (OnChanged);
94 fsw.Created += new FileSystemEventHandler (OnChanged);
96 fsw.EnableRaisingEvents = true;
99 Log.Info ("Scanning Liferea feeds...");
101 Stopwatch stopwatch = new Stopwatch ();
102 stopwatch.Start ();
104 DirectoryInfo dir = new DirectoryInfo (liferea_dir);
105 int count = 0;
106 foreach (FileInfo file in DirectoryWalker.GetFileInfos (dir)) {
107 IndexSingleFeed (file.FullName);
110 stopwatch.Stop ();
111 Log.Info ("{0} files will be parsed (scanned in {1})", count, stopwatch);
114 private bool CheckForExistence ()
116 if (!Directory.Exists (liferea_dir))
117 return true;
119 this.Start ();
121 return false;
124 /////////////////////////////////////////////////
126 // Modified/Created event using Inotify
128 private void OnInotifyEvent (Inotify.Watch watch,
129 string path,
130 string subitem,
131 string srcpath,
132 Inotify.EventType type)
134 // someone reported that backup files with abcd~
135 // were being generated
136 if (subitem == "" || subitem.EndsWith ("~"))
137 return;
139 if ((type & Inotify.EventType.CloseWrite) != 0)
140 IndexSingleFeed (Path.Combine (path, subitem));
141 else if ((type & Inotify.EventType.Delete) != 0)
142 Removefeed_file (Path.Combine (path, subitem));
145 // Modified/Created event using FSW
147 private void OnChanged (object o, FileSystemEventArgs args)
149 IndexSingleFeed (args.FullPath);
152 /////////////////////////////////////////////////
154 private void IndexSingleFeed (string filename) {
155 if (ThisScheduler.ContainsByTag (filename)) {
156 Log.Debug ("Not adding task for already running task: {0}", filename);
157 return;
160 FeedIndexableGenerator generator = new FeedIndexableGenerator (this, filename);
161 Scheduler.Task task;
162 task = NewAddTask (generator);
163 task.Tag = filename;
164 task.Source = this;
165 ThisScheduler.Add (task);
168 private void Removefeed_file (string file) {
169 Log.Debug ("Removing Liferea feed_file:" + file);
170 Uri uri = UriFu.PathToFileUri (file);
171 Scheduler.Task task = NewRemoveTask (uri);
172 task.Priority = Scheduler.Priority.Immediate;
173 task.SubPriority = 0;
174 ThisScheduler.Add (task);
181 * Indexable generator for Liferea Feeds
183 public class FeedIndexableGenerator : IIndexableGenerator {
184 private string feed_file;
185 private string icon_file = null;
186 private LifereaQueryable queryable;
188 private XmlTextReader reader;
189 private bool is_valid_file = true;
191 private string feed_source = "";
192 private string publisher = "";
193 private Item current_item;
194 private XmlSerializer serializer;
196 public FeedIndexableGenerator (LifereaQueryable queryable, string feed_file)
198 this.queryable = queryable;
199 this.feed_file = feed_file;
200 this.serializer = queryable.Serializer;
201 ReadFeedHeader ();
203 // Set icon file
204 string file_name = Path.GetFileNameWithoutExtension (feed_file);
205 this.icon_file = this.queryable.icon_dir;
206 this.icon_file = Path.Combine (icon_file, file_name);
207 this.icon_file = Path.ChangeExtension (icon_file, "png");
210 public void PostFlushHook ()
212 current_item = null;
213 //queryable.FileAttributesStore.AttachLastWriteTime (feed_file, DateTime.UtcNow);
216 public string StatusName {
217 get { return feed_file; }
220 private bool IsUpToDate (string path)
222 return queryable.FileAttributesStore.IsUpToDate (path);
225 private void ReadFeedHeader () {
227 if (IsUpToDate (feed_file)) {
228 is_valid_file = false;
229 return;
231 try {
232 Log.Debug ("Opening liferea feed file: {0}", feed_file);
233 reader = new XmlTextReader (feed_file);
234 reader.WhitespaceHandling = WhitespaceHandling.None;
236 is_valid_file = true;
238 // move to beginning of document
239 reader.MoveToContent();
240 // move to <feed> node
241 reader.ReadStartElement ("feed");
243 do {
244 string elementName = reader.Name;
245 if (elementName == "item")
246 break;
247 switch (elementName) {
248 case "feedSource":
249 reader.ReadStartElement ("feedSource");
250 feed_source = reader.ReadString ();
251 reader.ReadEndElement ();
252 break;
253 case "feedTitle":
254 reader.ReadStartElement ("feedTitle");
255 publisher = reader.ReadString ();
256 reader.ReadEndElement ();
257 break;
258 // ignore other elements
259 default:
260 reader.ReadOuterXml ();
261 break;
263 } while (!reader.EOF && reader.NodeType == XmlNodeType.Element);
264 } catch (XmlException ex) {
265 Log.Warn (ex, "Caught exception parsing feed file:");
266 is_valid_file = false;
267 reader.Close ();
271 public bool HasNextIndexable ()
273 current_item = null;
274 if (!is_valid_file || reader == null)
275 return false;
276 string itemString = "";
277 try {
278 // check if the reader is at the startnode
279 if (reader.NodeType == XmlNodeType.Element) {
280 itemString = reader.ReadOuterXml ();
281 // form node object from the <node>...</node> string
282 // FIXME Deserialize(...) is expensive - remove it altogether
283 current_item = (Item) serializer.Deserialize (new StringReader (itemString));
285 } catch (XmlException ex) {
286 // probably no more <item>
289 if (current_item == null) {
290 //Log.Debug ("LifereaQ: Probably no more feeds left in " + feed_file);
291 //Log.Debug ("Causing string = " + itemString);
292 current_item = null;
293 is_valid_file = false;
294 reader.Close ();
296 return is_valid_file;
299 public Indexable GetNextIndexable ()
301 if (current_item != null)
302 return current_itemToIndexable ();
303 else
304 return null;
307 private Indexable current_itemToIndexable ()
309 Indexable indexable;
310 try {
311 indexable = new Indexable (new Uri (String.Format ("{0};item={1}", feed_source, current_item.Source)));
312 } catch (System.UriFormatException) {
313 indexable = new Indexable (new Uri (String.Format ("liferea://dummy?{0};item={1}", feed_source, current_item.Source)));
315 indexable.ParentUri = UriFu.PathToFileUri (feed_file);
316 indexable.MimeType = "text/html";
317 indexable.HitType = "FeedItem";
319 DateTime date = DateTimeUtil.UnixToDateTimeUtc (0);
320 date = date.AddSeconds (current_item.Timestamp);
321 indexable.Timestamp = date;
323 // cleaning up the property names as far as possible
324 // this way querying for specific field is possible
325 // following DC element names wherever applicable
327 indexable.AddProperty (Property.New ("dc:title", current_item.Title));
328 Attribute[] attribs = current_item.Attribs.AttribArray;
329 if (attribs != null) {
330 foreach (Attribute attrib in attribs) {
331 if (attrib.Name != "author")
332 continue;
333 indexable.AddProperty (Property.New ("dc:creator", attrib.Value));
336 indexable.AddProperty (Property.NewKeyword ("dc:identifier", current_item.Source));
337 indexable.AddProperty (Property.NewKeyword ("dc:source", feed_source));
338 indexable.AddProperty (Property.New ("dc:publisher", publisher));
340 if (File.Exists (icon_file))
341 indexable.AddProperty (Property.NewUnsearched ("fixme:cachedimg", icon_file));
343 StringReader reader = new StringReader (current_item.Description);
344 indexable.SetTextReader (reader);
346 return indexable;
350 [System.Xml.Serialization.XmlRoot("item", Namespace="", IsNullable=false)]
351 [System.Xml.Serialization.XmlType("item", Namespace="")]
352 public class Item {
353 [XmlElement ("title")] public string Title = "";
354 [XmlElement ("description")] public string Description ="";
355 [XmlElement ("source")] public string Source="";
356 [XmlElement ("attributes")] public Attributes Attribs;
357 [XmlElement ("time")] public long Timestamp;
360 public class Attributes {
361 [XmlElement ("attribute")] public Attribute[] AttribArray;
364 public class Attribute{
365 [XmlAttribute ("name")] public string Name = "";
366 [XmlTextAttribute] public string Value = "";