4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
29 using System
.Collections
;
30 using System
.Threading
;
37 namespace Beagle
.Daemon
.BlamQueryable
{
39 [QueryableFlavor (Name
="Blam", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
40 public class BlamQueryable
: LuceneFileQueryable
{
42 private static Logger log
= Logger
.Get ("BlamQueryable");
48 // v1: changed property names to match DC element names
49 // v2: remove dc:date, use Timestamp property.
50 private const int INDEX_VERSION
= 2;
52 public BlamQueryable () : base ("BlamIndex", INDEX_VERSION
)
54 blam_dir
= Path
.Combine (Path
.Combine (PathFinder
.HomeDir
, ".gnome2"), "blam");
55 blam_file
= new FileInfo (Path
.Combine (blam_dir
, "collection.xml"));
58 /////////////////////////////////////////////////
60 public override void Start ()
64 ExceptionHandlingThread
.Start (new ThreadStart (StartWorker
));
67 private void StartWorker ()
69 if (!Directory
.Exists (blam_dir
)) {
70 GLib
.Timeout
.Add (60000, new GLib
.TimeoutHandler (CheckForExistence
));
74 if (Inotify
.Enabled
) {
75 Inotify
.EventType mask
= Inotify
.EventType
.CloseWrite
;
76 Inotify
.Subscribe (blam_dir
, OnInotifyEvent
, mask
);
78 FileSystemWatcher fsw
= new FileSystemWatcher ();
80 fsw
.Filter
= blam_file
.Name
;
82 fsw
.Changed
+= new FileSystemEventHandler (OnChangedEvent
);
83 fsw
.Created
+= new FileSystemEventHandler (OnChangedEvent
);
85 fsw
.EnableRaisingEvents
= true;
88 if (File
.Exists (blam_file
.FullName
))
92 private bool CheckForExistence ()
94 if (!Directory
.Exists (blam_dir
))
102 /////////////////////////////////////////////////
104 // Modified event using Inotify
105 private void OnInotifyEvent (Inotify
.Watch watch
,
109 Inotify
.EventType type
)
111 if (subitem
!= blam_file
.Name
)
117 // Modified/Created event using FSW
118 private void OnChangedEvent (object o
, FileSystemEventArgs args
)
123 /////////////////////////////////////////////////
125 private void Index ()
127 if (ThisScheduler
.ContainsByTag ("Blam")) {
128 Logger
.Log
.Debug ("Not adding task for already running Blam task");
132 ItemIndexableGenerator generator
= new ItemIndexableGenerator (this, blam_dir
, blam_file
.FullName
);
134 task
= NewAddTask (generator
);
136 ThisScheduler
.Add (task
);
142 * Indexable generator for Blam Feeds
144 public class ItemIndexableGenerator
: IIndexableGenerator
{
145 private string feed_file
;
146 private string blam_dir
;
147 private BlamQueryable queryable
;
148 private int indexed_count
;
150 private XmlTextReader reader
;
151 private bool is_valid_file
= true;
153 private string channel_url
, channel_name
;
155 public ItemIndexableGenerator (BlamQueryable queryable
, string blam_dir
, string feed_file
)
157 this.blam_dir
= blam_dir
;
158 this.queryable
= queryable
;
159 this.feed_file
= feed_file
;
163 public void PostFlushHook ()
165 //queryable.FileAttributesStore.AttachLastWriteTime (feed_file, DateTime.UtcNow);
168 public string StatusName
{
169 get { return feed_file; }
172 private bool IsUpToDate (string path
)
174 return queryable
.FileAttributesStore
.IsUpToDate (path
);
177 private void ReadFeedHeader () {
179 if (IsUpToDate (feed_file
)) {
180 is_valid_file
= false;
184 Logger
.Log
.Debug ("Opening blam collection file: {0}", feed_file
);
185 reader
= new XmlTextReader (feed_file
);
186 reader
.WhitespaceHandling
= WhitespaceHandling
.None
;
188 is_valid_file
= true;
189 // move to beginning of document
190 reader
.MoveToContent();
191 // move to <ChannelCollection> node
192 reader
.ReadStartElement ("ChannelCollection");
195 } catch (XmlException ex
) {
196 Logger
.Log
.Warn (ex
, "Caught exception parsing feed file:");
197 is_valid_file
= false;
202 public bool HasNextIndexable ()
204 if (!is_valid_file
|| reader
== null)
206 string elementname
= null;
208 while (! reader
.EOF
) {
209 elementname
= reader
.Name
;
211 if (reader
.NodeType
== XmlNodeType
.Element
&&
212 elementname
== "Item" &&
213 reader
.IsStartElement ())
216 // Assuming the structure of tags is flat i.e.
217 // <channel> (<item>...</item>)* </channel>
218 // and <channel> tags are not nested
219 // If later the file format changes,
220 // and channel tags become nested, need to make sure
221 // that when a nested channel ends, channel_name,
222 // channel_url are reset to the parent values
223 if (reader
.NodeType
== XmlNodeType
.Element
&&
224 elementname
== "Channel") {
226 channel_name
= reader
.GetAttribute ("Name");
227 channel_url
= reader
.GetAttribute ("Url");
232 if (elementname
== "Item") {
241 public Indexable
GetNextIndexable ()
243 string id
= reader
.GetAttribute ("Id");
244 string title
= reader
.GetAttribute ("Title");
245 string author
= reader
.GetAttribute ("Author");
246 // FIXME stupid mono bug; DateTime.ParseExact ("0001-01-01T00:00:00.0000000+00:00", ...)
247 // http://bugzilla.ximian.com/show_bug.cgi?id=76082
248 // Still present in 1.1.9.2
251 pub_date
= DateTime
.ParseExact (
252 reader
.GetAttribute ("PubDate"),
253 "yyyy-MM-ddTHH:mm:ss.fffffffzzz",
255 } catch (Exception e
) {
256 pub_date
= DateTime
.MinValue
;
258 string link
= reader
.GetAttribute ("Link");
259 string text
= reader
.GetAttribute ("Text");
262 Uri uri
= new Uri (String
.Format ("feed:{0};item={1}", channel_url
, id
));
263 Logger
.Log
.Debug ("BlamQ: Indexing [" + channel_name
+ "] " + title
);
265 Indexable indexable
= new Indexable (uri
);
266 indexable
.ParentUri
= UriFu
.PathToFileUri (feed_file
);
267 indexable
.MimeType
= "text/html";
268 indexable
.HitType
= "FeedItem";
269 indexable
.Timestamp
= pub_date
.ToUniversalTime ();
271 // change property names to DC names, as far as allowed
272 indexable
.AddProperty (Property
.New ("dc:title", title
));
273 indexable
.AddProperty (Property
.New ("dc:creator", author
));
274 indexable
.AddProperty (Property
.NewKeyword ("dc:identifier", link
));
275 indexable
.AddProperty (Property
.NewKeyword ("dc:source", channel_url
));
276 indexable
.AddProperty (Property
.New ("dc:publisher", channel_name
));
279 int i
= text
.IndexOf ("<img src=\"");
281 i
+= "<img src=\"".Length
;
282 int j
= text
.IndexOf ("\"", i
);
284 img
= text
.Substring (i
, j
-i
);
288 string path
= Path
.Combine (Path
.Combine (blam_dir
, "Cache"),
289 img
.GetHashCode ().ToString ());
290 indexable
.AddProperty (Property
.NewUnsearched ("fixme:cachedimg", path
));
293 StringReader string_reader
= new StringReader (text
);
294 indexable
.SetTextReader (string_reader
);