4 // Copyright (C) 2005 Carl-Emil Lagerstedt
5 // Copyright (C) 2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
29 using System
.Collections
;
30 using System
.Threading
;
33 using System
.Xml
.Serialization
;
38 namespace Beagle
.Daemon
.LifereaQueryable
{
40 [QueryableFlavor (Name
="Liferea", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
41 public class LifereaQueryable
: LuceneFileQueryable
{
43 private static Logger log
= Logger
.Get ("LifereaQueryable");
47 private XmlSerializer serializer
= null;
48 public XmlSerializer Serializer
{
50 if (serializer
== null)
51 serializer
= new XmlSerializer (typeof (Item
));
56 // add versioning info
57 // v1: change property names to match DC element names
58 private const int INDEX_VERSION
= 1;
60 public LifereaQueryable () : base ("LifereaIndex", INDEX_VERSION
)
62 liferea_dir
= Path
.Combine (PathFinder
.HomeDir
, ".liferea");
63 liferea_dir
= Path
.Combine (liferea_dir
, "cache");
64 liferea_dir
= Path
.Combine (liferea_dir
, "feeds");
67 /////////////////////////////////////////////////
69 public override void Start ()
73 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
76 private void StartWorker ()
78 if (!Directory
.Exists (liferea_dir
)) {
79 GLib
.Timeout
.Add (60000, new GLib
.TimeoutHandler (CheckForExistence
));
83 if (Inotify
.Enabled
) {
84 Inotify
.EventType mask
= Inotify
.EventType
.CloseWrite
85 | Inotify
.EventType
.Delete
;
87 Inotify
.Subscribe (liferea_dir
, OnInotifyEvent
, mask
);
89 FileSystemWatcher fsw
= new FileSystemWatcher ();
90 fsw
.Path
= liferea_dir
;
92 fsw
.Changed
+= new FileSystemEventHandler (OnChanged
);
93 fsw
.Created
+= new FileSystemEventHandler (OnChanged
);
95 fsw
.EnableRaisingEvents
= true;
98 log
.Info ("Scanning Liferea feeds...");
100 State
= QueryableState
.Crawling
;
101 Stopwatch stopwatch
= new Stopwatch ();
104 DirectoryInfo dir
= new DirectoryInfo (liferea_dir
);
106 foreach (FileInfo file
in DirectoryWalker
.GetFileInfos (dir
)) {
107 IndexSingleFeed (file
.FullName
);
110 State
= QueryableState
.Idle
;
112 log
.Info ("{0} files will be parsed (scanned in {1})", count
, stopwatch
);
115 private bool CheckForExistence ()
117 if (!Directory
.Exists (liferea_dir
))
125 /////////////////////////////////////////////////
127 // Modified/Created event using Inotify
129 private void OnInotifyEvent (Inotify
.Watch watch
,
133 Inotify
.EventType type
)
135 // someone reported that backup files with abcd~
136 // were being generated
137 if (subitem
== "" || subitem
.EndsWith ("~"))
140 if ((type
& Inotify
.EventType
.CloseWrite
) != 0)
141 IndexSingleFeed (Path
.Combine (path
, subitem
));
142 else if ((type
& Inotify
.EventType
.Delete
) != 0)
143 Removefeed_file (Path
.Combine (path
, subitem
));
146 // Modified/Created event using FSW
148 private void OnChanged (object o
, FileSystemEventArgs args
)
150 IndexSingleFeed (args
.FullPath
);
153 /////////////////////////////////////////////////
155 private void IndexSingleFeed (string filename
) {
156 if (ThisScheduler
.ContainsByTag (filename
)) {
157 Logger
.Log
.Debug ("Not adding task for already running task: {0}", filename
);
161 FeedIndexableGenerator generator
= new FeedIndexableGenerator (this, filename
);
163 task
= NewAddTask (generator
);
166 ThisScheduler
.Add (task
);
169 private void Removefeed_file (string file
) {
170 Logger
.Log
.Debug ("Removing Liferea feed_file:" + file
);
171 Uri uri
= UriFu
.PathToFileUri (file
);
172 Scheduler
.Task task
= NewRemoveTask (uri
);
173 task
.Priority
= Scheduler
.Priority
.Immediate
;
174 task
.SubPriority
= 0;
175 ThisScheduler
.Add (task
);
182 * Indexable generator for Liferea Feeds
184 public class FeedIndexableGenerator
: IIndexableGenerator
{
185 private string feed_file
;
186 private LifereaQueryable queryable
;
188 private XmlTextReader reader
;
189 private bool is_valid_file
= true;
191 private string feed_source
= "";
192 private string publisher
= "";
193 private Item current_item
;
194 private XmlSerializer serializer
;
196 public FeedIndexableGenerator (LifereaQueryable queryable
, string feed_file
)
198 this.queryable
= queryable
;
199 this.feed_file
= feed_file
;
200 this.serializer
= queryable
.Serializer
;
204 public void PostFlushHook ()
207 //queryable.FileAttributesStore.AttachLastWriteTime (feed_file, DateTime.UtcNow);
210 public string StatusName
{
211 get { return feed_file; }
214 private bool IsUpToDate (string path
)
216 return queryable
.FileAttributesStore
.IsUpToDate (path
);
219 private void ReadFeedHeader () {
221 if (IsUpToDate (feed_file
)) {
222 is_valid_file
= false;
226 Logger
.Log
.Debug ("Opening liferea feed file: {0}", feed_file
);
227 reader
= new XmlTextReader (feed_file
);
228 reader
.WhitespaceHandling
= WhitespaceHandling
.None
;
230 is_valid_file
= true;
232 // move to beginning of document
233 reader
.MoveToContent();
234 // move to <feed> node
235 reader
.ReadStartElement ("feed");
238 string elementName
= reader
.Name
;
239 if (elementName
== "item")
241 switch (elementName
) {
243 reader
.ReadStartElement ("feedSource");
244 feed_source
= reader
.ReadString ();
245 reader
.ReadEndElement ();
248 reader
.ReadStartElement ("feedTitle");
249 publisher
= reader
.ReadString ();
250 reader
.ReadEndElement ();
252 // ignore other elements
254 reader
.ReadOuterXml ();
257 } while (!reader
.EOF
&& reader
.NodeType
== XmlNodeType
.Element
);
258 } catch (XmlException ex
) {
259 Logger
.Log
.Debug ("Invalid feed file: " + ex
.Message
);
260 is_valid_file
= false;
265 public bool HasNextIndexable ()
268 if (!is_valid_file
|| reader
== null)
270 string itemString
= "";
272 // check if the reader is at the startnode
273 if (reader
.NodeType
== XmlNodeType
.Element
) {
274 itemString
= reader
.ReadOuterXml ();
275 // form node object from the <node>...</node> string
276 // FIXME Deserialize(...) is expensive - remove it altogether
277 current_item
= (Item
) serializer
.Deserialize (new StringReader (itemString
));
279 } catch (XmlException ex
) {
280 // probably no more <item>
283 if (current_item
== null) {
284 //Logger.Log.Debug ("LifereaQ: Probably no more feeds left in " + feed_file);
285 //Logger.Log.Debug ("Causing string = " + itemString);
287 is_valid_file
= false;
290 return is_valid_file
;
293 public Indexable
GetNextIndexable ()
295 if (current_item
!= null)
296 return current_itemToIndexable ();
301 private Indexable
current_itemToIndexable ()
305 indexable
= new Indexable (new Uri (String
.Format ("{0};item={1}", feed_source
, current_item
.Source
)));
306 } catch (System
.UriFormatException
) {
307 indexable
= new Indexable (new Uri (String
.Format ("liferea://dummy?{0};item={1}", feed_source
, current_item
.Source
)));
309 indexable
.ParentUri
= UriFu
.PathToFileUri (feed_file
);
310 indexable
.MimeType
= "text/html";
311 indexable
.HitType
= "FeedItem";
313 DateTime date
= new DateTime (1970, 1, 1);
314 date
= date
.AddSeconds (current_item
.Timestamp
);
315 indexable
.Timestamp
= date
;
317 // cleaning up the property names as far as possible
318 // this way querying for specific field is possible
319 // following DC element names wherever applicable
321 indexable
.AddProperty (Property
.New ("dc:title", current_item
.Title
));
322 Attribute
[] attribs
= current_item
.Attribs
.AttribArray
;
323 if (attribs
!= null) {
324 foreach (Attribute attrib
in attribs
) {
325 if (attrib
.Name
!= "author")
327 indexable
.AddProperty (Property
.New ("dc:creator", attrib
.Value
));
330 indexable
.AddProperty (Property
.NewDate ("dc:date", date
));
331 indexable
.AddProperty (Property
.NewKeyword ("dc:identifier", current_item
.Source
));
332 indexable
.AddProperty (Property
.NewKeyword ("dc:source", feed_source
));
333 indexable
.AddProperty (Property
.New ("dc:publisher", publisher
));
335 StringReader reader
= new StringReader (current_item
.Description
);
336 indexable
.SetTextReader (reader
);
342 [System
.Xml
.Serialization
.XmlRoot("item", Namespace
="", IsNullable
=false)]
343 [System
.Xml
.Serialization
.XmlType("item", Namespace
="")]
345 [XmlElement ("title")] public string Title
= "";
346 [XmlElement ("description")] public string Description
="";
347 [XmlElement ("source")] public string Source
="";
348 [XmlElement ("attributes")] public Attributes Attribs
;
349 [XmlElement ("time")] public long Timestamp
;
352 public class Attributes
{
353 [XmlElement ("attribute")] public Attribute
[] AttribArray
;
356 public class Attribute
{
357 [XmlAttribute ("name")] public string Name
= "";
358 [XmlTextAttribute
] public string Value
= "";