2 // AkregatorQueryable.cs
4 // Copyright (C) 2005 Debajyoti Bera
7 // Permission is hereby granted, free of charge, to any person obtaining a
8 // copy of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the
12 // Software is furnished to do so, subject to the following conditions:
14 // The above copyright notice and this permission notice shall be included in
15 // all copies or substantial portions of the Software.
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
29 using System
.Threading
;
33 using System
.Xml
.Serialization
;
38 namespace Beagle
.Daemon
.AkregatorQueryable
{
40 [QueryableFlavor (Name
="Akregator", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
41 public class AkregatorQueryable
: LuceneFileQueryable
, IIndexableGenerator
{
43 private static Logger log
= Logger
.Get ("AkregatorQueryable");
47 public AkregatorQueryable () : base ("AkregatorIndex")
49 akregator_dir
= Path
.Combine (PathFinder
.HomeDir
, ".kde");
50 akregator_dir
= Path
.Combine (akregator_dir
, "share");
51 akregator_dir
= Path
.Combine (akregator_dir
, "apps");
52 akregator_dir
= Path
.Combine (akregator_dir
, "akregator");
53 akregator_dir
= Path
.Combine (akregator_dir
, "Archive");
56 /////////////////////////////////////////////////
58 public override void Start ()
62 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
65 private void StartWorker ()
67 if (!Directory
.Exists (akregator_dir
)) {
68 GLib
.Timeout
.Add (60000, new GLib
.TimeoutHandler (CheckForExistence
));
72 if (Inotify
.Enabled
) {
73 Inotify
.EventType mask
= Inotify
.EventType
.CloseWrite
;
75 Inotify
.Subscribe (akregator_dir
, OnInotifyEvent
, mask
);
77 FileSystemWatcher fsw
= new FileSystemWatcher ();
78 fsw
.Path
= akregator_dir
;
80 fsw
.Changed
+= new FileSystemEventHandler (OnChanged
);
81 fsw
.Created
+= new FileSystemEventHandler (OnChanged
);
83 fsw
.EnableRaisingEvents
= true;
86 log
.Info ("Scanning Akregator feeds...");
88 Stopwatch stopwatch
= new Stopwatch ();
89 int feed_count
= 0, item_count
= 0;
93 DirectoryInfo dir
= new DirectoryInfo (akregator_dir
);
94 this.files_to_parse
= dir
.GetFiles ();
95 Scheduler
.Task task
= NewAddTask (this);
96 task
.Tag
= "Akregator";
97 ThisScheduler
.Add (task
);
100 log
.Info ("{0} files will be parsed (scanned in {1})", this.files_to_parse
.Count
, stopwatch
);
103 private bool CheckForExistence ()
105 if (!Directory
.Exists (akregator_dir
))
113 /////////////////////////////////////////////////
115 // Modified/Created event using Inotify
117 private void OnInotifyEvent (Inotify
.Watch watch
,
121 Inotify
.EventType type
)
126 IndexSingleFeed (Path
.Combine (path
, subitem
), Scheduler
.Priority
.Immediate
);
129 // Modified/Created event using FSW
131 private void OnChanged (object o
, FileSystemEventArgs args
)
133 IndexSingleFeed (args
.FullPath
, Scheduler
.Priority
.Immediate
);
136 /////////////////////////////////////////////////
138 private bool IsFeedDeleted (Channel channel
, Item item
)
140 for (int i
=0; i
<item
.MetaList
.Count
; ++i
) {
141 MetaInfo meta
= (MetaInfo
)item
.MetaList
[i
];
142 if (meta
.Type
== "deleted" && meta
.value == "true") {
149 private Indexable
FeedItemToIndexable (Channel channel
, Item item
, FileInfo file
)
151 Indexable indexable
= new Indexable (new Uri (String
.Format ("feed:{0};item={1}", channel
.Link
, item
.Link
)));
152 indexable
.ParentUri
= UriFu
.PathToFileUri (file
.FullName
);
153 indexable
.MimeType
= "text/html";
154 indexable
.Type
= "FeedItem";
156 int offset
; //will be ignored - only store the time at current machine
157 DateTime date
= GMime
.Utils
.HeaderDecodeDate (item
.PubDate
, out offset
);
159 indexable
.Timestamp
= date
;
161 indexable
.AddProperty (Property
.New ("dc:title", item
.Title
));
162 indexable
.AddProperty (Property
.NewDate ("fixme:published", date
));
163 indexable
.AddProperty (Property
.NewKeyword ("fixme:itemuri", item
.Link
));
164 indexable
.AddProperty (Property
.NewKeyword ("fixme:webloguri", channel
.Link
));
166 StringReader reader
= new StringReader (item
.Description
);
167 indexable
.SetTextReader (reader
);
171 // Parse and index a single feed
173 private int IndexSingleFeed (string filename
, Scheduler
.Priority priority
)
175 FileInfo file
= new FileInfo(filename
);
180 if (IsUpToDate (file
.FullName
))
183 feed
= RSS
.LoadFromFile(file
.FullName
);
185 if(feed
== null || feed
.channel
== null || feed
.channel
.Items
== null)
188 foreach (Item item
in feed
.channel
.Items
) {
189 if (IsFeedDeleted (feed
.channel
, item
))
194 Indexable indexable
= FeedItemToIndexable (feed
.channel
, item
, file
);
196 Scheduler
.Task task
= NewAddTask (indexable
);
197 task
.Priority
= priority
;
198 task
.SubPriority
= 0;
199 ThisScheduler
.Add (task
);
206 ////////////////////////////////////////////////
208 // IIndexableGenerator implementation
210 private ICollection files_to_parse
;
211 private IEnumerator file_enumerator
= null;
212 private IEnumerator item_enumerator
= null;
213 private RSS current_feed
;
215 public Indexable
GetNextIndexable ()
217 Item item
= (Item
) this.item_enumerator
.Current
;
218 FileInfo file
= (FileInfo
) this.file_enumerator
.Current
;
219 // FIXME: We should find the next valid feed and return that
220 // that wont waste unnecessary function calls
221 // but that would need to handle HasNextIndexable as well
222 // Right now we return null as LuceneQueryable can handle null
223 if (IsFeedDeleted (this.current_feed
.channel
, item
))
226 return FeedItemToIndexable (this.current_feed
.channel
, item
, file
);
229 public bool HasNextIndexable ()
231 if (this.files_to_parse
.Count
== 0)
234 while (this.item_enumerator
== null || !this.item_enumerator
.MoveNext ()) {
235 if (this.file_enumerator
== null)
236 this.file_enumerator
= this.files_to_parse
.GetEnumerator ();
239 if (!this.file_enumerator
.MoveNext ())
242 FileInfo file
= (FileInfo
) this.file_enumerator
.Current
;
244 if (IsUpToDate (file
.FullName
))
247 RSS feed
= RSS
.LoadFromFile (file
.FullName
);
249 if (feed
== null || feed
.channel
== null || feed
.channel
.Items
== null)
252 this.current_feed
= feed
;
254 } while (this.current_feed
== null);
256 this.item_enumerator
= this.current_feed
.channel
.Items
.GetEnumerator ();
262 public string StatusName
{
268 ////////////////////////////////////////////////
270 // De-serialization classes
271 // Changing to standard stream parsing will increse performance no doubt
272 // but not sure if it will be noticable
274 public class MetaInfo
{
276 public string value = "";
277 [XmlAttribute ("type")] public string Type
= "";
281 [XmlElement ("pubDate")] public string PubDate
;
282 [XmlElement ("title")] public string Title
= "";
283 [XmlElement ("description")] public string Description
="";
284 [XmlElement ("link")] public string Link
="";
285 [XmlElement ("meta", typeof (MetaInfo
), Namespace
="http://foobar")]
286 public ArrayList MetaList
{
287 get { return metaList; }
288 set { metaList = value; }
290 private ArrayList metaList
= new ArrayList ();
293 public class Channel
{
294 [XmlElement ("title")] public string Title
="";
295 [XmlElement ("link")] public string Link
="";
296 [XmlElement ("description")] public string Description
="";
299 [XmlElement ("item", typeof (Item
))]
300 public ArrayList Items
{
301 get { return mItems; }
302 set { mItems = value; }
304 private ArrayList mItems
= new ArrayList ();
308 [XmlElement ("channel", typeof (Channel
))]
309 public Channel channel
;
311 public static RSS
LoadFromFile (string filename
) {
313 XmlRootAttribute xRoot
= new XmlRootAttribute();
314 xRoot
.ElementName
= "rss";
316 XmlSerializer serializer
= new XmlSerializer (typeof (RSS
), xRoot
);
317 Stream stream
= new FileStream (filename
,
321 XmlTextReader reader
= new XmlTextReader (stream
);
323 if (!serializer
.CanDeserialize(reader
) )
324 Console
.WriteLine ("Muopp");
325 f
= (RSS
) serializer
.Deserialize (reader
);