Indexable is not marked _done_ until all the child indexables (including child of...
[beagle.git] / beagled / BlamQueryable / BlamQueryable.cs
blobfb54e2c043153f5c0bc3ccef62828dcfe0649117
1 //
2 // BlamQueryable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Collections;
30 using System.Threading;
32 using System.Xml;
34 using Beagle.Daemon;
35 using Beagle.Util;
37 namespace Beagle.Daemon.BlamQueryable {
39 [QueryableFlavor (Name="Blam", Domain=QueryDomain.Local, RequireInotify=false)]
40 public class BlamQueryable : LuceneFileQueryable {
42 private static Logger log = Logger.Get ("BlamQueryable");
44 string blam_dir;
45 FileInfo blam_file;
47 // add versioning
48 // v1: changed property names to match DC element names
49 // v2: remove dc:date, use Timestamp property.
50 private const int INDEX_VERSION = 2;
52 public BlamQueryable () : base ("BlamIndex", INDEX_VERSION)
54 blam_dir = Path.Combine (Path.Combine (PathFinder.HomeDir, ".gnome2"), "blam");
55 blam_file = new FileInfo (Path.Combine (blam_dir, "collection.xml"));
58 /////////////////////////////////////////////////
60 public override void Start ()
62 base.Start ();
64 ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
67 private void StartWorker ()
69 if (!Directory.Exists (blam_dir)) {
70 GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
71 return;
74 if (Inotify.Enabled) {
75 Inotify.EventType mask = Inotify.EventType.CloseWrite;
76 Inotify.Subscribe (blam_dir, OnInotifyEvent, mask);
77 } else {
78 FileSystemWatcher fsw = new FileSystemWatcher ();
79 fsw.Path = blam_dir;
80 fsw.Filter = blam_file.Name;
82 fsw.Changed += new FileSystemEventHandler (OnChangedEvent);
83 fsw.Created += new FileSystemEventHandler (OnChangedEvent);
85 fsw.EnableRaisingEvents = true;
88 if (File.Exists (blam_file.FullName))
89 Index ();
92 private bool CheckForExistence ()
94 if (!Directory.Exists (blam_dir))
95 return true;
97 this.Start ();
99 return false;
102 /////////////////////////////////////////////////
104 // Modified event using Inotify
105 private void OnInotifyEvent (Inotify.Watch watch,
106 string path,
107 string subitem,
108 string srcpath,
109 Inotify.EventType type)
111 if (subitem != blam_file.Name)
112 return;
114 Index ();
117 // Modified/Created event using FSW
118 private void OnChangedEvent (object o, FileSystemEventArgs args)
120 Index ();
123 /////////////////////////////////////////////////
125 private void Index ()
127 if (ThisScheduler.ContainsByTag ("Blam")) {
128 Logger.Log.Debug ("Not adding task for already running Blam task");
129 return;
132 ItemIndexableGenerator generator = new ItemIndexableGenerator (this, blam_dir, blam_file.FullName);
133 Scheduler.Task task;
134 task = NewAddTask (generator);
135 task.Tag = "Blam";
136 ThisScheduler.Add (task);
142 * Indexable generator for Blam Feeds
144 public class ItemIndexableGenerator : IIndexableGenerator {
145 private string feed_file;
146 private string blam_dir;
147 private BlamQueryable queryable;
148 private int indexed_count;
150 private XmlTextReader reader;
151 private bool is_valid_file = true;
153 private string channel_url, channel_name;
155 public ItemIndexableGenerator (BlamQueryable queryable, string blam_dir, string feed_file)
157 this.blam_dir = blam_dir;
158 this.queryable = queryable;
159 this.feed_file = feed_file;
160 ReadFeedHeader ();
163 public void PostFlushHook ()
165 //queryable.FileAttributesStore.AttachLastWriteTime (feed_file, DateTime.UtcNow);
168 public string StatusName {
169 get { return feed_file; }
172 private bool IsUpToDate (string path)
174 return queryable.FileAttributesStore.IsUpToDate (path);
177 private void ReadFeedHeader () {
179 if (IsUpToDate (feed_file)) {
180 is_valid_file = false;
181 return;
183 try {
184 Logger.Log.Debug ("Opening blam collection file: {0}", feed_file);
185 reader = new XmlTextReader (feed_file);
186 reader.WhitespaceHandling = WhitespaceHandling.None;
188 is_valid_file = true;
189 // move to beginning of document
190 reader.MoveToContent();
191 // move to <ChannelCollection> node
192 reader.ReadStartElement ("ChannelCollection");
193 channel_name = null;
194 channel_url = null;
195 } catch (XmlException ex) {
196 Logger.Log.Warn (ex, "Caught exception parsing feed file:");
197 is_valid_file = false;
198 reader.Close ();
202 public bool HasNextIndexable ()
204 if (!is_valid_file || reader == null)
205 return false;
206 string elementname = null;
208 while (! reader.EOF) {
209 elementname = reader.Name;
211 if (reader.NodeType == XmlNodeType.Element &&
212 elementname == "Item" &&
213 reader.IsStartElement ())
214 break;
216 // Assuming the structure of tags is flat i.e.
217 // <channel> (<item>...</item>)* </channel>
218 // and <channel> tags are not nested
219 // If later the file format changes,
220 // and channel tags become nested, need to make sure
221 // that when a nested channel ends, channel_name,
222 // channel_url are reset to the parent values
223 if (reader.NodeType == XmlNodeType.Element &&
224 elementname == "Channel") {
226 channel_name = reader.GetAttribute ("Name");
227 channel_url = reader.GetAttribute ("Url");
229 reader.Read ();
232 if (elementname == "Item") {
233 return true;
234 } else {
235 reader.Close ();
236 return false;
241 public Indexable GetNextIndexable ()
243 string id = reader.GetAttribute ("Id");
244 string title = reader.GetAttribute ("Title");
245 string author = reader.GetAttribute ("Author");
246 // FIXME stupid mono bug; DateTime.ParseExact ("0001-01-01T00:00:00.0000000+00:00", ...)
247 // http://bugzilla.ximian.com/show_bug.cgi?id=76082
248 // Still present in 1.1.9.2
249 DateTime pub_date;
250 try {
251 pub_date = DateTime.ParseExact (
252 reader.GetAttribute ("PubDate"),
253 "yyyy-MM-ddTHH:mm:ss.fffffffzzz",
254 null);
255 } catch (Exception e) {
256 pub_date = DateTime.MinValue;
258 string link = reader.GetAttribute ("Link");
259 string text = reader.GetAttribute ("Text");
260 reader.Read ();
262 Uri uri = new Uri (String.Format ("feed:{0};item={1}", channel_url, id));
263 Logger.Log.Debug ("BlamQ: Indexing [" + channel_name + "] " + title);
265 Indexable indexable = new Indexable (uri);
266 indexable.ParentUri = UriFu.PathToFileUri (feed_file);
267 indexable.MimeType = "text/html";
268 indexable.HitType = "FeedItem";
269 indexable.Timestamp = pub_date.ToUniversalTime ();
271 // change property names to DC names, as far as allowed
272 indexable.AddProperty (Property.New ("dc:title", title));
273 indexable.AddProperty (Property.New ("dc:creator", author));
274 indexable.AddProperty (Property.NewKeyword ("dc:identifier", link));
275 indexable.AddProperty (Property.NewKeyword ("dc:source", channel_url));
276 indexable.AddProperty (Property.New ("dc:publisher", channel_name));
278 string img = null;
279 int i = text.IndexOf ("<img src=\"");
280 if (i != -1) {
281 i += "<img src=\"".Length;
282 int j = text.IndexOf ("\"", i);
283 if (j != -1)
284 img = text.Substring (i, j-i);
287 if (img != null) {
288 string path = Path.Combine (Path.Combine (blam_dir, "Cache"),
289 img.GetHashCode ().ToString ());
290 indexable.AddProperty (Property.NewUnsearched ("fixme:cachedimg", path));
293 StringReader string_reader = new StringReader (text);
294 indexable.SetTextReader (string_reader);
296 return indexable;