4 // Copyright (C) 2005 Carl-Emil Lagerstedt
5 // Copyright (C) 2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
29 using System
.Collections
;
30 using System
.Threading
;
33 using System
.Xml
.Serialization
;
38 namespace Beagle
.Daemon
.LifereaQueryable
{
40 [QueryableFlavor (Name
="Liferea", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
41 public class LifereaQueryable
: LuceneFileQueryable
{
43 private static Logger log
= Logger
.Get ("LifereaQueryable");
47 private XmlSerializer serializer
= null;
48 public XmlSerializer Serializer
{
50 if (serializer
== null)
51 serializer
= new XmlSerializer (typeof (Item
));
56 // add versioning info
57 // v1: change property names to match DC element names
58 // v2: remove dc:date, use Timestamp property.
59 private const int INDEX_VERSION
= 2;
61 public LifereaQueryable () : base ("LifereaIndex", INDEX_VERSION
)
63 liferea_dir
= Path
.Combine (PathFinder
.HomeDir
, ".liferea");
64 liferea_dir
= Path
.Combine (liferea_dir
, "cache");
65 liferea_dir
= Path
.Combine (liferea_dir
, "feeds");
68 /////////////////////////////////////////////////
70 public override void Start ()
74 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
77 private void StartWorker ()
79 if (!Directory
.Exists (liferea_dir
)) {
80 GLib
.Timeout
.Add (60000, new GLib
.TimeoutHandler (CheckForExistence
));
84 if (Inotify
.Enabled
) {
85 Inotify
.EventType mask
= Inotify
.EventType
.CloseWrite
86 | Inotify
.EventType
.Delete
;
88 Inotify
.Subscribe (liferea_dir
, OnInotifyEvent
, mask
);
90 FileSystemWatcher fsw
= new FileSystemWatcher ();
91 fsw
.Path
= liferea_dir
;
93 fsw
.Changed
+= new FileSystemEventHandler (OnChanged
);
94 fsw
.Created
+= new FileSystemEventHandler (OnChanged
);
96 fsw
.EnableRaisingEvents
= true;
99 log
.Info ("Scanning Liferea feeds...");
101 State
= QueryableState
.Crawling
;
102 Stopwatch stopwatch
= new Stopwatch ();
105 DirectoryInfo dir
= new DirectoryInfo (liferea_dir
);
107 foreach (FileInfo file
in DirectoryWalker
.GetFileInfos (dir
)) {
108 IndexSingleFeed (file
.FullName
);
111 State
= QueryableState
.Idle
;
113 log
.Info ("{0} files will be parsed (scanned in {1})", count
, stopwatch
);
116 private bool CheckForExistence ()
118 if (!Directory
.Exists (liferea_dir
))
126 /////////////////////////////////////////////////
128 // Modified/Created event using Inotify
130 private void OnInotifyEvent (Inotify
.Watch watch
,
134 Inotify
.EventType type
)
136 // someone reported that backup files with abcd~
137 // were being generated
138 if (subitem
== "" || subitem
.EndsWith ("~"))
141 if ((type
& Inotify
.EventType
.CloseWrite
) != 0)
142 IndexSingleFeed (Path
.Combine (path
, subitem
));
143 else if ((type
& Inotify
.EventType
.Delete
) != 0)
144 Removefeed_file (Path
.Combine (path
, subitem
));
147 // Modified/Created event using FSW
149 private void OnChanged (object o
, FileSystemEventArgs args
)
151 IndexSingleFeed (args
.FullPath
);
154 /////////////////////////////////////////////////
156 private void IndexSingleFeed (string filename
) {
157 if (ThisScheduler
.ContainsByTag (filename
)) {
158 Logger
.Log
.Debug ("Not adding task for already running task: {0}", filename
);
162 FeedIndexableGenerator generator
= new FeedIndexableGenerator (this, filename
);
164 task
= NewAddTask (generator
);
167 ThisScheduler
.Add (task
);
170 private void Removefeed_file (string file
) {
171 Logger
.Log
.Debug ("Removing Liferea feed_file:" + file
);
172 Uri uri
= UriFu
.PathToFileUri (file
);
173 Scheduler
.Task task
= NewRemoveTask (uri
);
174 task
.Priority
= Scheduler
.Priority
.Immediate
;
175 task
.SubPriority
= 0;
176 ThisScheduler
.Add (task
);
183 * Indexable generator for Liferea Feeds
185 public class FeedIndexableGenerator
: IIndexableGenerator
{
186 private string feed_file
;
187 private LifereaQueryable queryable
;
189 private XmlTextReader reader
;
190 private bool is_valid_file
= true;
192 private string feed_source
= "";
193 private string publisher
= "";
194 private Item current_item
;
195 private XmlSerializer serializer
;
197 public FeedIndexableGenerator (LifereaQueryable queryable
, string feed_file
)
199 this.queryable
= queryable
;
200 this.feed_file
= feed_file
;
201 this.serializer
= queryable
.Serializer
;
205 public void PostFlushHook ()
208 //queryable.FileAttributesStore.AttachLastWriteTime (feed_file, DateTime.UtcNow);
211 public string StatusName
{
212 get { return feed_file; }
215 private bool IsUpToDate (string path
)
217 return queryable
.FileAttributesStore
.IsUpToDate (path
);
220 private void ReadFeedHeader () {
222 if (IsUpToDate (feed_file
)) {
223 is_valid_file
= false;
227 Logger
.Log
.Debug ("Opening liferea feed file: {0}", feed_file
);
228 reader
= new XmlTextReader (feed_file
);
229 reader
.WhitespaceHandling
= WhitespaceHandling
.None
;
231 is_valid_file
= true;
233 // move to beginning of document
234 reader
.MoveToContent();
235 // move to <feed> node
236 reader
.ReadStartElement ("feed");
239 string elementName
= reader
.Name
;
240 if (elementName
== "item")
242 switch (elementName
) {
244 reader
.ReadStartElement ("feedSource");
245 feed_source
= reader
.ReadString ();
246 reader
.ReadEndElement ();
249 reader
.ReadStartElement ("feedTitle");
250 publisher
= reader
.ReadString ();
251 reader
.ReadEndElement ();
253 // ignore other elements
255 reader
.ReadOuterXml ();
258 } while (!reader
.EOF
&& reader
.NodeType
== XmlNodeType
.Element
);
259 } catch (XmlException ex
) {
260 Logger
.Log
.Warn (ex
, "Caught exception parsing feed file:");
261 is_valid_file
= false;
266 public bool HasNextIndexable ()
269 if (!is_valid_file
|| reader
== null)
271 string itemString
= "";
273 // check if the reader is at the startnode
274 if (reader
.NodeType
== XmlNodeType
.Element
) {
275 itemString
= reader
.ReadOuterXml ();
276 // form node object from the <node>...</node> string
277 // FIXME Deserialize(...) is expensive - remove it altogether
278 current_item
= (Item
) serializer
.Deserialize (new StringReader (itemString
));
280 } catch (XmlException ex
) {
281 // probably no more <item>
284 if (current_item
== null) {
285 //Logger.Log.Debug ("LifereaQ: Probably no more feeds left in " + feed_file);
286 //Logger.Log.Debug ("Causing string = " + itemString);
288 is_valid_file
= false;
291 return is_valid_file
;
294 public Indexable
GetNextIndexable ()
296 if (current_item
!= null)
297 return current_itemToIndexable ();
302 private Indexable
current_itemToIndexable ()
306 indexable
= new Indexable (new Uri (String
.Format ("{0};item={1}", feed_source
, current_item
.Source
)));
307 } catch (System
.UriFormatException
) {
308 indexable
= new Indexable (new Uri (String
.Format ("liferea://dummy?{0};item={1}", feed_source
, current_item
.Source
)));
310 indexable
.ParentUri
= UriFu
.PathToFileUri (feed_file
);
311 indexable
.MimeType
= "text/html";
312 indexable
.HitType
= "FeedItem";
314 DateTime date
= new DateTime (1970, 1, 1);
315 date
= date
.AddSeconds (current_item
.Timestamp
);
316 indexable
.Timestamp
= date
;
318 // cleaning up the property names as far as possible
319 // this way querying for specific field is possible
320 // following DC element names wherever applicable
322 indexable
.AddProperty (Property
.New ("dc:title", current_item
.Title
));
323 Attribute
[] attribs
= current_item
.Attribs
.AttribArray
;
324 if (attribs
!= null) {
325 foreach (Attribute attrib
in attribs
) {
326 if (attrib
.Name
!= "author")
328 indexable
.AddProperty (Property
.New ("dc:creator", attrib
.Value
));
331 indexable
.AddProperty (Property
.NewKeyword ("dc:identifier", current_item
.Source
));
332 indexable
.AddProperty (Property
.NewKeyword ("dc:source", feed_source
));
333 indexable
.AddProperty (Property
.New ("dc:publisher", publisher
));
335 StringReader reader
= new StringReader (current_item
.Description
);
336 indexable
.SetTextReader (reader
);
342 [System
.Xml
.Serialization
.XmlRoot("item", Namespace
="", IsNullable
=false)]
343 [System
.Xml
.Serialization
.XmlType("item", Namespace
="")]
345 [XmlElement ("title")] public string Title
= "";
346 [XmlElement ("description")] public string Description
="";
347 [XmlElement ("source")] public string Source
="";
348 [XmlElement ("attributes")] public Attributes Attribs
;
349 [XmlElement ("time")] public long Timestamp
;
352 public class Attributes
{
353 [XmlElement ("attribute")] public Attribute
[] AttribArray
;
356 public class Attribute
{
357 [XmlAttribute ("name")] public string Name
= "";
358 [XmlTextAttribute
] public string Value
= "";