beagled/LuceneCommon.cs

   1 //
   2 // LuceneCommon.cs
   3 //
   4 // Copyright (C) 2004-2005 Novell, Inc.
   5 //
   6
   7 //
   8 // Permission is hereby granted, free of charge, to any person obtaining a
   9 // copy of this software and associated documentation files (the "Software"),
  10 // to deal in the Software without restriction, including without limitation
  11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12 // and/or sell copies of the Software, and to permit persons to whom the
  13 // Software is furnished to do so, subject to the following conditions:
  14 //
  15 // The above copyright notice and this permission notice shall be included in
  16 // all copies or substantial portions of the Software.
  17 //
  18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  24 // DEALINGS IN THE SOFTWARE.
  25 //
  26
  27 using System;
  28 using System.Collections;
  29 using System.Diagnostics;
  30 using System.Globalization;
  31 using System.IO;
  32 using System.Text;
  33 using System.Threading;
  34 using System.Xml;
  35 using System.Xml.Serialization;
  36
  37 using Lucene.Net.Analysis;
  38 using Lucene.Net.Analysis.Standard;
  39 using Lucene.Net.Documents;
  40 using Lucene.Net.Index;
  41 using Lucene.Net.QueryParsers;
  42 using LNS = Lucene.Net.Search;
  43
  44 using Beagle.Util;
  45
  46 namespace Beagle.Daemon {
  47
  48         public class LuceneCommon {
  49
  50                 public delegate bool HitFilter (Hit hit);
  51
  52                 // VERSION HISTORY
  53                 // ---------------
  54                 //
  55                 //  1: Original
  56                 //  2: Changed format of timestamp strings
  57                 //  3: Schema changed to be more Dashboard-Match-like
  58                 //  4: Schema changed for files to include _Directory property
  59                 //  5: Changed analyzer to support stemming.  Bumped version # to
  60                 //     force everyone to re-index.
  61                 //  6: lots of schema changes as part of the general refactoring
  62                 //  7: incremented to force a re-index after our upgrade to lucene 1.4
  63                 //     (in theory the file formats are compatible, we are seeing 'term
  64                 //     out of order' exceptions in some cases)
  65                 //  8: another forced re-index, this time because of massive changes
  66                 //     in the file system backend (it would be nice to have per-backend
  67                 //     versioning so that we didn't have to purge all indexes just
  68                 //     because one changed)
  69                 //  9: changed the way properties are stored, changed in conjunction
  70                 //     with sane handling of multiple properties on hits.
  71                 // 10: changed to support typed and mutable properties
  72                 // 11: moved mime type and hit type into properties
  73                 // 12: added year-month and year-month-day resolutions for all
  74                 //     date properties
  75                 // 13: moved source into a property
  76                 // 14: allow wildcard queries to also match keywords
  77                 // 15: analyze PropertyKeyword field, and store all properties as
  78                 //     lower case so that we're truly case insensitive.
  79                 // 16: add inverted timestamp to make querying substantially faster
  80                 // 17: add boolean property to denote a child indexable
  81                 private const int MAJOR_VERSION = 17;
  82                 private int minor_version = 0;
  83
  84                 private string index_name;
  85                 private string top_dir;
  86
  87                 private string fingerprint;
  88                 private int last_item_count = -1;
  89
  90                 // This is the big index, containing document full-texts and
  91                 // data that is expensive to index.
  92                 private Lucene.Net.Store.Directory primary_store = null;
  93
  94                 // This is the small index, containing document info that we
  95                 // expect to have change.  Canonical example: file names.
  96                 private Lucene.Net.Store.Directory secondary_store = null;
  97
  98                 //////////////////////////////////////////////////////////////////////////////
  99
 100                 protected LuceneCommon (string index_name, int minor_version)
 101                 {
 102                         this.index_name = index_name;
 103                         this.minor_version = minor_version;
 104
 105                         this.top_dir = (Path.IsPathRooted (index_name)) ? index_name : Path.Combine (PathFinder.IndexDir, index_name);
 106                 }
 107
 108                 //////////////////////////////////////////////////////////////////////////////
 109
 110                 protected string IndexName { get { return index_name; } }
 111
 112                 public Lucene.Net.Store.Directory PrimaryStore { get { return primary_store; } }
 113
 114                 public Lucene.Net.Store.Directory SecondaryStore { get { return secondary_store; } }
 115
 116                 public string Fingerprint { get { return fingerprint; } }
 117
 118                 public string TopDirectory { get { return top_dir; } }
 119
 120                 //////////////////////////////////////////////////////////////////////////////
 121
 122                 protected TextCache text_cache = null;
 123
 124                 public TextCache TextCache {
 125                         get { return text_cache; }
 126                         set { text_cache = value; }
 127                 }
 128
 129                 //////////////////////////////////////////////////////////////////////////////
 130
 131                 private string VersionFile {
 132                         get { return Path.Combine (top_dir, "version"); }
 133                 }
 134
 135                 private string FingerprintFile {
 136                         get { return Path.Combine (top_dir, "fingerprint"); }
 137                 }
 138
 139                 // Shouldn't really be public
 140                 public string PrimaryIndexDirectory {
 141                         get { return Path.Combine (top_dir, "PrimaryIndex"); }
 142                 }
 143
 144                 // Shouldn't really be public
 145                 public string SecondaryIndexDirectory {
 146                         get { return Path.Combine (top_dir, "SecondaryIndex"); }
 147                 }
 148
 149                 public string LockDirectory {
 150                         get { return Path.Combine (top_dir, "Locks"); }
 151                 }
 152
 153                 //////////////////////////////////////////////////////////////////////////////
 154
 155                 // Deal with dangling locks
 156
 157                 private bool IsDanglingLock (FileInfo info)
 158                 {
 159                         Log.Debug ("Checking for dangling locks...");
 160
 161                         // It isn't even a lock file
 162                         if (! info.Name.EndsWith (".lock"))
 163                                 return false;
 164
 165                         StreamReader reader;
 166                         string pid = null;
 167
 168                         try {
 169                                 reader = new StreamReader (info.FullName);
 170                                 pid = reader.ReadLine ();
 171                                 reader.Close ();
 172
 173                         } catch {
 174                                 // We couldn't read the lockfile, so it probably went away.
 175                                 return false;
 176                         }
 177
 178
 179                         if (pid == null) {
 180                                 // Looks like the lock file was empty, which really
 181                                 // shouldn't happen.  It should contain the PID of
 182                                 // the process which locked it.  Lets be on the safe
 183                                 // side and assume it's a dangling lock.
 184                                 Log.Warn ("Found an empty lock file, that shouldn't happen: {0}", info.FullName);
 185                                 return true;
 186                         }
 187
 188                         string cmdline_file;
 189                         cmdline_file = String.Format ("/proc/{0}/cmdline", pid);
 190
 191                         string cmdline = "";
 192                         try {
 193                                 reader = new StreamReader (cmdline_file);
 194                                 cmdline = reader.ReadLine ();
 195                                 reader.Close ();
 196                         } catch {
 197                                 // If we can't open that file, either:
 198                                 // (1) The process doesn't exist
 199                                 // (2) It does exist, but it doesn't belong to us.
 200                                 //     Thus it isn't an IndexHelper
 201                                 // In either case, the lock is dangling --- if it
 202                                 // still exists.
 203                                 return info.Exists;
 204                         }
 205
 206                         // The process exists, but isn't an IndexHelper.
 207                         // If the lock file is still there, it is dangling.
 208                         // FIXME: During one run of bludgeon I got a null reference
 209                         // exception here, so I added the cmdline == null check.
 210                         // Why exactly would that happen?  Is this logic correct
 211                         // in that (odd and presumably rare) case?
 212                         if (cmdline == null || cmdline.IndexOf ("IndexHelper.exe") == -1)
 213                                 return info.Exists;
 214
 215                         // If we reach this point, we know:
 216                         // (1) The process still exists
 217                         // (2) We own it
 218                         // (3) It is an IndexHelper process
 219                         // Thus it almost certainly isn't a dangling lock.
 220                         // The process might be wedged, but that is
 221                         // another issue...
 222                         return false;
 223                 }
 224
 225                 protected bool Exists ()
 226                 {
 227                         if (! (Directory.Exists (top_dir)
 228                                && File.Exists (VersionFile)
 229                                && File.Exists (FingerprintFile)
 230                                && Directory.Exists (PrimaryIndexDirectory)
 231                                && IndexReader.IndexExists (PrimaryIndexDirectory)
 232                                && Directory.Exists (SecondaryIndexDirectory)
 233                                && IndexReader.IndexExists (SecondaryIndexDirectory)
 234                                && Directory.Exists (LockDirectory)))
 235                                 return false;
 236
 237                         // Check the index's version number.  If it is wrong,
 238                         // declare the index non-existent.
 239
 240                         StreamReader version_reader;
 241                         string version_str;
 242                         version_reader = new StreamReader (VersionFile);
 243                         version_str = version_reader.ReadLine ();
 244                         version_reader.Close ();
 245
 246                         int current_major_version, current_minor_version;
 247                         int i = version_str.IndexOf ('.');
 248
 249                         if (i != -1) {
 250                                 current_major_version = Convert.ToInt32 (version_str.Substring (0, i));
 251                                 current_minor_version = Convert.ToInt32 (version_str.Substring (i+1));
 252                         } else {
 253                                 current_minor_version = Convert.ToInt32 (version_str);
 254                                 current_major_version = 0;
 255                         }
 256
 257                         if (current_major_version != MAJOR_VERSION
 258                             || (minor_version >= 0 && current_minor_version != minor_version)) {
 259                                 Logger.Log.Debug ("Version mismatch in {0}", index_name);
 260                                 Logger.Log.Debug ("Index has version {0}.{1}, expected {2}.{3}",
 261                                                   current_major_version, current_minor_version,
 262                                                   MAJOR_VERSION, minor_version);
 263                                 return false;
 264                         }
 265
 266                         // Check the lock directory: If there is a dangling write lock,
 267                         // assume that the index is corrupted and declare it non-existent.
 268                         DirectoryInfo lock_dir_info;
 269                         lock_dir_info = new DirectoryInfo (LockDirectory);
 270                         foreach (FileInfo info in lock_dir_info.GetFiles ()) {
 271                                 if (IsDanglingLock (info)) {
 272                                         Logger.Log.Warn ("Found a dangling index lock on {0}", info.FullName);
 273                                         return false;
 274                                 }
 275                         }
 276
 277                         return true;
 278                 }
 279
 280                 private Lucene.Net.Store.Directory CreateIndex (string path)
 281                 {
 282                         // Create a directory to put the index in.
 283                         Directory.CreateDirectory (path);
 284
 285                         // Create a new store.
 286                         Lucene.Net.Store.Directory store;
 287                         store = Lucene.Net.Store.FSDirectory.GetDirectory (path, LockDirectory, true);
 288
 289                         // Create an empty index in that store.
 290                         IndexWriter writer;
 291                         writer = new IndexWriter (store, null, true);
 292                         writer.Close ();
 293
 294                         return store;
 295                 }
 296
 297                 // Create will kill your index dead.  Use it with care.
 298                 // You don't need to call Open after calling Create.
 299                 protected void Create ()
 300                 {
 301                         if (minor_version < 0)
 302                                 minor_version = 0;
 303
 304                         // Purge any existing directories.
 305                         if (Directory.Exists (top_dir)) {
 306                                 Logger.Log.Debug ("Purging {0}", top_dir);
 307                                 Directory.Delete (top_dir, true);
 308                         }
 309
 310                         // Create any necessary directories.
 311                         Directory.CreateDirectory (top_dir);
 312                         Directory.CreateDirectory (LockDirectory);
 313
 314                         // Create the indexes.
 315                         primary_store = CreateIndex (PrimaryIndexDirectory);
 316                         secondary_store = CreateIndex (SecondaryIndexDirectory);
 317
 318                         // Generate and store the index fingerprint.
 319                         fingerprint = GuidFu.ToShortString (Guid.NewGuid ());
 320                         TextWriter writer;
 321                         writer = new StreamWriter (FingerprintFile, false);
 322                         writer.WriteLine (fingerprint);
 323                         writer.Close ();
 324
 325                         // Store our index version information.
 326                         writer = new StreamWriter (VersionFile, false);
 327                         writer.WriteLine ("{0}.{1}", MAJOR_VERSION, minor_version);
 328                         writer.Close ();
 329                 }
 330
 331                 protected void Open ()
 332                 {
 333                         Open (false);
 334                 }
 335
 336                 protected void Open (bool read_only_mode)
 337                 {
 338                         // Read our index fingerprint.
 339                         TextReader reader;
 340                         reader = new StreamReader (FingerprintFile);
 341                         fingerprint = reader.ReadLine ();
 342                         reader.Close ();
 343
 344                         // Create stores for our indexes.
 345                         primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, LockDirectory, false, read_only_mode);
 346                         secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, LockDirectory, false, read_only_mode);
 347                 }
 348
 349                 ////////////////////////////////////////////////////////////////
 350
 351                 //
 352                 // Custom Analyzers
 353                 //
 354
 355                 private class SingletonTokenStream : TokenStream {
 356
 357                         private string singleton_str;
 358
 359                         public SingletonTokenStream (string singleton_str)
 360                         {
 361                                 this.singleton_str = singleton_str;
 362                         }
 363
 364                         override public Lucene.Net.Analysis.Token Next ()
 365                         {
 366                                 if (singleton_str == null)
 367                                         return null;
 368
 369                                 Lucene.Net.Analysis.Token token;
 370                                 token = new Lucene.Net.Analysis.Token (singleton_str, 0, singleton_str.Length);
 371
 372                                 singleton_str = null;
 373
 374                                 return token;
 375                         }
 376                 }
 377
 378                 // FIXME: This assumes everything being indexed is in English!
 379                 internal class BeagleAnalyzer : StandardAnalyzer {
 380
 381                         private char [] buffer = new char [2];
 382                         private bool strip_extra_property_info = false;
 383                         private bool tokenize_email_hostname = false;
 384
 385                         public BeagleAnalyzer (bool is_indexing_analyzer)
 386                         {
 387                                 if (is_indexing_analyzer) {
 388                                         this.strip_extra_property_info = true;
 389                                         this.tokenize_email_hostname = true;
 390                                 } else {
 391                                         this.strip_extra_property_info = false;
 392                                         this.tokenize_email_hostname = false;
 393                                 }
 394                         }
 395
 396                         public override TokenStream TokenStream (string fieldName, TextReader reader)
 397                         {
 398                                 bool is_text_prop = false;
 399
 400                                 // Strip off the first two characters in a property.
 401                                 // We store type information in those two characters, so we don't
 402                                 // want to index them.
 403                                 if (fieldName.StartsWith ("prop:")) {
 404
 405                                         if (strip_extra_property_info) {
 406                                                 // Skip everything up to and including the first :
 407                                                 int c;
 408                                                 do {
 409                                                         c = reader.Read ();
 410                                                 } while (c != -1 && c != ':');
 411                                         }
 412
 413                                         is_text_prop = fieldName.StartsWith ("prop:t");
 414
 415                                         // If this is non-text property, just return one token
 416                                         // containing the entire string.  We do this to avoid
 417                                         // tokenizing keywords.
 418                                         if (! is_text_prop) {
 419                                                 // We don't want to lower case the token if it's
 420                                                 // not in the private namespace.
 421
 422                                                 TokenStream singleton_stream = new SingletonTokenStream (reader.ReadToEnd ());
 423
 424                                                 if (fieldName.StartsWith ("prop:k:" + Property.PrivateNamespace))
 425                                                         return singleton_stream;
 426                                                 else
 427                                                         return new LowerCaseFilter (singleton_stream);
 428                                         }
 429                                 } else if (fieldName == "PropertyKeyword")
 430                                         return new LowerCaseFilter (new SingletonTokenStream (reader.ReadToEnd ()));
 431
 432                                 TokenStream outstream;
 433                                 outstream = base.TokenStream (fieldName, reader);
 434
 435                                 if (fieldName == "Text"
 436                                     || fieldName == "HotText"
 437                                     || fieldName == "PropertyText"
 438                                     || is_text_prop) {
 439                                         outstream = new NoiseEmailHostFilter (outstream, tokenize_email_hostname);
 440                                         outstream = new PorterStemFilter (outstream);
 441                                 }
 442
 443                                 return outstream;
 444                         }
 445                 }
 446
 447                 static private Analyzer indexing_analyzer = new BeagleAnalyzer (true);
 448                 static private Analyzer query_analyzer = new BeagleAnalyzer (false);
 449
 450                 static protected Analyzer IndexingAnalyzer { get { return indexing_analyzer; } }
 451                 static protected Analyzer QueryAnalyzer { get { return query_analyzer; } }
 452
 453                 ////////////////////////////////////////////////////////////////
 454
 455                 //
 456                 // Dealing with properties
 457                 //
 458
 459                 static private char TypeToCode (PropertyType type)
 460                 {
 461                         switch (type) {
 462                         case PropertyType.Text:    return 't';
 463                         case PropertyType.Keyword: return 'k';
 464                         case PropertyType.Date:    return 'd';
 465                         }
 466                         throw new Exception ("Bad property type: " + type);
 467                 }
 468
 469                 static private PropertyType CodeToType (char c)
 470                 {
 471                         switch (c) {
 472                         case 't': return PropertyType.Text;
 473                         case 'k': return PropertyType.Keyword;
 474                         case 'd': return PropertyType.Date;
 475                         }
 476
 477                         throw new Exception ("Bad property code: " + c);
 478                 }
 479
 480                 static private string TypeToWildcardField (PropertyType type)
 481                 {
 482                         switch (type) {
 483                         case PropertyType.Text:    return "PropertyText";
 484                         case PropertyType.Keyword: return "PropertyKeyword";
 485                         case PropertyType.Date:    return "PropertyDate";
 486                         }
 487
 488                         throw new Exception ("Bad property type: " + type);
 489                 }
 490
 491                 static private Field.Index TypeToIndexInstruction (PropertyType type)
 492                 {
 493                         switch (type) {
 494                         case PropertyType.Text:    return Field.Index.TOKENIZED; // Full analysis
 495                         case PropertyType.Keyword: return Field.Index.TOKENIZED; // Lowercases keywords
 496                         case PropertyType.Date:    return Field.Index.NO_NORMS;  // Do nothing
 497                         }
 498
 499                         throw new Exception ("Bad property type: " + type);
 500                 }
 501
 502                 // Exposing this is a little bit suspicious.
 503                 static protected string PropertyToFieldName (PropertyType type, string key)
 504                 {
 505                         return String.Format ("prop:{0}:{1}", TypeToCode (type), key);
 506
 507                 }
 508
 509                 static private void AddDateFields (string field_name, Property prop, Document doc)
 510                 {
 511                         DateTime dt = StringFu.StringToDateTime (prop.Value);
 512
 513                         Field f;
 514                         f = new Field ("YM:" + field_name,
 515                                        StringFu.DateTimeToYearMonthString (dt),
 516                                        Field.Store.NO,
 517                                        Field.Index.NO_NORMS);
 518                         doc.Add (f);
 519
 520                         f = new Field ("D:" + field_name,
 521                                        StringFu.DateTimeToDayString (dt),
 522                                        Field.Store.NO,
 523                                        Field.Index.NO_NORMS);
 524                         doc.Add (f);
 525                 }
 526
 527                 static protected void AddPropertyToDocument (Property prop, Document doc)
 528                 {
 529                         if (prop == null || prop.Value == null || prop.Value == String.Empty)
 530                                 return;
 531
 532                         // Don't actually put properties in the UnindexedNamespace
 533                         // in the document.  A horrible (and yet lovely!) hack.
 534                         if (prop.Key.StartsWith (StringFu.UnindexedNamespace))
 535                                 return;
 536
 537                         Field f;
 538
 539                         if (prop.IsSearched) {
 540                                 string wildcard_field = TypeToWildcardField (prop.Type);
 541
 542                                 f = new Field (wildcard_field,
 543                                                prop.Value,
 544                                                Field.Store.NO,
 545                                                TypeToIndexInstruction (prop.Type));
 546
 547                                 // We don't want to include norms for non-text
 548                                 // fields, even if we do tokenize them.
 549                                 if (prop.Type == PropertyType.Keyword || prop.Type == PropertyType.Date)
 550                                         f.SetOmitNorms (true);
 551
 552                                 doc.Add (f);
 553
 554                                 if (prop.Type == PropertyType.Date)
 555                                         AddDateFields (wildcard_field, prop, doc);
 556                         }
 557
 558                         string coded_value;
 559                         coded_value = String.Format ("{0}:{1}",
 560                                                      prop.IsSearched ? 's' : '_',
 561                                                      prop.Value);
 562
 563                         string field_name = PropertyToFieldName (prop.Type, prop.Key);
 564
 565                         f = new Field (field_name,
 566                                        coded_value,
 567                                        prop.IsStored ? Field.Store.YES : Field.Store.NO,
 568                                        Field.Index.TOKENIZED);
 569                         doc.Add (f);
 570
 571                         if (prop.Type == PropertyType.Date)
 572                                 AddDateFields (field_name, prop, doc);
 573                 }
 574
 575                 static protected Property GetPropertyFromDocument (Field f, Document doc, bool from_primary_index)
 576                 {
 577                         // Note: we don't use the document that we pass in,
 578                         // but in theory we could.  At some later point we
 579                         // might need to split a property's data across two or
 580                         // more fields in the document.
 581
 582                         if (f == null)
 583                                 return null;
 584
 585                         string field_name;
 586                         field_name = f.Name ();
 587                         if (field_name.Length < 7
 588                             || ! field_name.StartsWith ("prop:"))
 589                                 return null;
 590
 591                         string field_value;
 592                         field_value = f.StringValue ();
 593
 594                         Property prop;
 595                         prop = new Property ();
 596                         prop.Type = CodeToType (field_name [5]);
 597                         prop.Key = field_name.Substring (7);
 598                         prop.Value = field_value.Substring (2);
 599                         prop.IsSearched = (field_value [0] == 's');
 600                         prop.IsMutable = ! from_primary_index;
 601                         prop.IsStored = f.IsStored ();
 602
 603                         return prop;
 604                 }
 605
 606                 //////////////////////////////////////////////////////////////////////////////
 607
 608                 //
 609                 // Dealing with documents
 610                 //
 611
 612                 static protected void BuildDocuments (Indexable indexable,
 613                                                       out Document primary_doc,
 614                                                       out Document secondary_doc)
 615                 {
 616                         primary_doc = new Document ();
 617                         secondary_doc = null;
 618
 619                         Field f;
 620
 621                         f = new Field ("Uri", UriFu.UriToEscapedString (indexable.Uri),
 622                                        Field.Store.YES, Field.Index.NO_NORMS);
 623                         primary_doc.Add (f);
 624
 625                         if (indexable.ParentUri != null) {
 626                                 f = new Field ("ParentUri", UriFu.UriToEscapedString (indexable.ParentUri),
 627                                                Field.Store.YES, Field.Index.NO_NORMS);
 628                                 primary_doc.Add (f);
 629                         }
 630
 631                         if (indexable.ValidTimestamp) {
 632                                 // Note that we also want to search in the
 633                                 // Timestamp field when we do a wildcard date
 634                                 // query, so that's why we also add a wildcard
 635                                 // field for each item here.
 636
 637                                 string wildcard_field = TypeToWildcardField (PropertyType.Date);
 638
 639                                 string str = StringFu.DateTimeToString (indexable.Timestamp);
 640                                 f = new Field ("Timestamp", str, Field.Store.YES, Field.Index.NO_NORMS);
 641                                 primary_doc.Add (f);
 642                                 f = new Field (wildcard_field, str, Field.Store.NO, Field.Index.NO_NORMS);
 643                                 primary_doc.Add (f);
 644
 645                                 // Create an inverted timestamp so that we can
 646                                 // sort by timestamp at search-time.
 647                                 long timeval = Convert.ToInt64 (str);
 648                                 f = new Field ("InvertedTimestamp", (Int64.MaxValue - timeval).ToString (),
 649                                                Field.Store.NO, Field.Index.NO_NORMS);
 650                                 primary_doc.Add (f);
 651
 652                                 str = StringFu.DateTimeToYearMonthString (indexable.Timestamp);
 653                                 f = new Field ("YM:Timestamp", str, Field.Store.YES, Field.Index.NO_NORMS);
 654                                 primary_doc.Add (f);
 655                                 f = new Field ("YM:" + wildcard_field, str,
 656                                                Field.Store.NO, Field.Index.NO_NORMS);
 657                                 primary_doc.Add (f);
 658
 659                                 str = StringFu.DateTimeToDayString (indexable.Timestamp);
 660                                 f = new Field ("D:Timestamp", str, Field.Store.YES, Field.Index.NO_NORMS);
 661                                 primary_doc.Add (f);
 662                                 f = new Field ("D:" + wildcard_field, str,
 663                                                Field.Store.NO, Field.Index.NO_NORMS);
 664                                 primary_doc.Add (f);
 665                         }
 666
 667                         if (indexable.NoContent) {
 668                                 // If there is no content, make a note of that
 669                                 // in a special property.
 670                                 Property prop;
 671                                 prop = Property.NewBool ("beagle:NoContent", true);
 672                                 AddPropertyToDocument (prop, primary_doc);
 673
 674                         } else {
 675
 676                                 // Since we might have content, add our text
 677                                 // readers.
 678
 679                                 TextReader reader;
 680
 681                                 reader = indexable.GetTextReader ();
 682                                 if (reader != null) {
 683                                         f = new Field ("Text", reader);
 684                                         primary_doc.Add (f);
 685                                 }
 686
 687                                 reader = indexable.GetHotTextReader ();
 688                                 if (reader != null) {
 689                                         f = new Field ("HotText", reader);
 690                                         primary_doc.Add (f);
 691                                 }
 692                         }
 693
 694                         // Store the Type and MimeType in special properties
 695
 696                         if (indexable.HitType != null) {
 697                                 Property prop;
 698                                 prop = Property.NewUnsearched ("beagle:HitType", indexable.HitType);
 699                                 AddPropertyToDocument (prop, primary_doc);
 700                         }
 701
 702                         if (indexable.MimeType != null) {
 703                                 Property prop;
 704                                 prop = Property.NewUnsearched ("beagle:MimeType", indexable.MimeType);
 705                                 AddPropertyToDocument (prop, primary_doc);
 706                         }
 707
 708                         if (indexable.Source != null) {
 709                                 Property prop;
 710                                 prop = Property.NewUnsearched ("beagle:Source", indexable.Source);
 711                                 AddPropertyToDocument (prop, primary_doc);
 712                         }
 713
 714                         {
 715                                 Property prop;
 716                                 prop = Property.NewBool (Property.IsChildPropKey, indexable.IsChild);
 717                                 AddPropertyToDocument (prop, primary_doc);
 718                         }
 719
 720                         // Store the other properties
 721
 722                         foreach (Property prop in indexable.Properties) {
 723                                 Document target_doc = primary_doc;
 724                                 if (prop.IsMutable) {
 725                                         if (secondary_doc == null)
 726                                                 secondary_doc = CreateSecondaryDocument (indexable.Uri, indexable.ParentUri);
 727
 728                                         target_doc = secondary_doc;
 729                                 }
 730
 731                                 AddPropertyToDocument (prop, target_doc);
 732                         }
 733                 }
 734
 735                 static private Document CreateSecondaryDocument (Uri uri, Uri parent_uri)
 736                 {
 737                         Document secondary_doc = new Document ();
 738
 739                         Field f = new Field ("Uri", UriFu.UriToEscapedString (uri), Field.Store.YES, Field.Index.NO_NORMS);
 740                         secondary_doc.Add (f);
 741
 742                         if (parent_uri != null) {
 743                                 // Store both Uri and ParentUri in secondary index for easy removal
 744                                 f = new Field ("ParentUri", UriFu.UriToEscapedString (parent_uri), Field.Store.YES, Field.Index.NO_NORMS);
 745                                 secondary_doc.Add (f);
 746                         }
 747
 748                         return secondary_doc;
 749                 }
 750
 751                 static protected Document RewriteDocument (Document old_secondary_doc,
 752                                                            Indexable prop_only_indexable)
 753                 {
 754                         Hashtable seen_props;
 755                         seen_props = new Hashtable ();
 756
 757                         Document new_doc;
 758                         new_doc = new Document ();
 759
 760                         Field uri_f;
 761                         uri_f = new Field ("Uri", UriFu.UriToEscapedString (prop_only_indexable.Uri), Field.Store.YES, Field.Index.NO_NORMS);
 762                         new_doc.Add (uri_f);
 763
 764                         Logger.Log.Debug ("Rewriting {0}", prop_only_indexable.DisplayUri);
 765
 766                         if (prop_only_indexable.ParentUri != null) {
 767                                 uri_f = new Field ("ParentUri", UriFu.UriToEscapedString (prop_only_indexable.ParentUri), Field.Store.YES, Field.Index.NO_NORMS);
 768                                 new_doc.Add (uri_f);
 769                                 Logger.Log.Debug ("Parent Uri {0}", prop_only_indexable.ParentUri);
 770                         }
 771
 772                         // Add the new properties to the new document.  To
 773                         // delete a property, set the Value to null... then it
 774                         // will be added to seen_props (so the old value will
 775                         // be ignored below), but AddPropertyToDocument will
 776                         // return w/o doing anything.
 777                         foreach (Property prop in prop_only_indexable.Properties) {
 778                                 seen_props [prop.Key] = prop;
 779
 780                                 // Don't add properties that are empty; they
 781                                 // essentially mean "reset this property"
 782                                 if (prop.Value == String.Empty) {
 783                                         Logger.Log.Debug ("Resetting prop '{0}'", prop.Key);
 784                                         continue;
 785                                 }
 786
 787                                 AddPropertyToDocument (prop, new_doc);
 788                                 Logger.Log.Debug ("New prop '{0}' = '{1}'", prop.Key, prop.Value);
 789                         }
 790
 791                         // Copy the other properties from the old document to the
 792                         // new one, skipping any properties that we got new values
 793                         // for out of the Indexable.
 794                         if (old_secondary_doc != null) {
 795                                 foreach (Field f in old_secondary_doc.Fields ()) {
 796                                         Property prop;
 797                                         prop = GetPropertyFromDocument (f, old_secondary_doc, false);
 798                                         if (prop != null && ! seen_props.Contains (prop.Key)) {
 799                                                 Logger.Log.Debug ("Old prop '{0}' = '{1}'", prop.Key, prop.Value);
 800                                                 AddPropertyToDocument (prop, new_doc);
 801                                         }
 802                                 }
 803                         }
 804
 805                         return new_doc;
 806                 }
 807
 808                 static protected Uri GetUriFromDocument (Document doc)
 809                 {
 810                         string uri;
 811                         uri = doc.Get ("Uri");
 812                         if (uri == null)
 813                                 throw new Exception ("Got document from Lucene w/o a URI!");
 814                         return UriFu.EscapedStringToUri (uri);
 815                 }
 816
 817                 static protected Hit DocumentToHit (Document doc)
 818                 {
 819                         Hit hit;
 820                         hit = new Hit ();
 821
 822                         hit.Uri = GetUriFromDocument (doc);
 823
 824                         string str;
 825                         str = doc.Get ("ParentUri");
 826                         if (str != null)
 827                                 hit.ParentUri = UriFu.EscapedStringToUri (str);
 828
 829                         hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp"));
 830
 831                         AddPropertiesToHit (hit, doc, true);
 832
 833                         // Get the Type and MimeType from the properties.
 834                         hit.Type = hit.GetFirstProperty ("beagle:HitType");
 835                         hit.MimeType = hit.GetFirstProperty ("beagle:MimeType");
 836                         hit.Source = hit.GetFirstProperty ("beagle:Source");
 837
 838                         return hit;
 839                 }
 840
 841                 static protected void AddPropertiesToHit (Hit hit, Document doc, bool from_primary_index)
 842                 {
 843                         foreach (Field f in doc.Fields ()) {
 844                                 Property prop;
 845                                 prop = GetPropertyFromDocument (f, doc, from_primary_index);
 846                                 if (prop != null)
 847                                         hit.AddProperty (prop);
 848                         }
 849                 }
 850
 851
 852                 //////////////////////////////////////////////////////////////////////////////
 853
 854                 //
 855                 // Handle the index's item count
 856                 //
 857
 858                 public int GetItemCount ()
 859                 {
 860                         if (last_item_count < 0) {
 861                                 IndexReader reader;
 862                                 reader = GetReader (PrimaryStore);
 863                                 last_item_count = reader.NumDocs ();
 864                                 ReleaseReader (reader);
 865                         }
 866                         return last_item_count;
 867                 }
 868
 869                 // We should set the cached count of index items when IndexReaders
 870                 // are open and available, so calls to GetItemCount will return immediately.
 871
 872                 protected bool HaveItemCount { get { return last_item_count >= 0; } }
 873
 874                 protected void SetItemCount (IndexReader reader)
 875                 {
 876                         last_item_count = reader.NumDocs ();
 877                 }
 878
 879                 public void SetItemCount (int count)
 880                 {
 881                         last_item_count = count;
 882                 }
 883
 884                 protected void AdjustItemCount (int delta)
 885                 {
 886                         if (last_item_count >= 0)
 887                                 last_item_count += delta;
 888                 }
 889
 890                 //////////////////////////////////////////////////////////////////////////////
 891
 892                 //
 893                 // Access to the stemmer and list of stop words
 894                 //
 895
 896                 static PorterStemmer stemmer = new PorterStemmer ();
 897
 898                 static public string Stem (string str)
 899                 {
 900                         return stemmer.Stem (str);
 901                 }
 902
 903                 public static bool IsStopWord (string stemmed_word)
 904                 {
 905                         return ArrayFu.IndexOfString (StopAnalyzer.ENGLISH_STOP_WORDS, stemmed_word) != -1;
 906                 }
 907
 908                 //////////////////////////////////////////////////////////////////////////////
 909
 910                 //
 911                 // Special Hit Filtering classes
 912                 //
 913
 914                 static private bool TrueHitFilter (Hit hit)
 915                 {
 916                         return true;
 917                 }
 918
 919                 static private HitFilter true_hit_filter = new HitFilter (TrueHitFilter);
 920
 921                 public class OrHitFilter {
 922
 923                         private ArrayList all = new ArrayList ();
 924                         private bool contains_known_true = false;
 925
 926                         public void Add (HitFilter hit_filter)
 927                         {
 928                                 if (hit_filter == true_hit_filter)
 929                                         contains_known_true = true;
 930                                 all.Add (hit_filter);
 931                         }
 932
 933                         public bool HitFilter (Hit hit)
 934                         {
 935                                 if (contains_known_true)
 936                                         return true;
 937                                 foreach (HitFilter hit_filter in all)
 938                                         if (hit_filter (hit))
 939                                                 return true;
 940                                 return false;
 941                         }
 942                 }
 943
 944                 public class AndHitFilter {
 945
 946                         private ArrayList all = new ArrayList ();
 947
 948                         public void Add (HitFilter hit_filter)
 949                         {
 950                                 all.Add (hit_filter);
 951                         }
 952
 953                         public bool HitFilter (Hit hit)
 954                         {
 955                                 foreach (HitFilter hit_filter in all)
 956                                         if (! hit_filter (hit))
 957                                                 return false;
 958                                 return true;
 959                         }
 960                 }
 961
 962                 public class NotHitFilter {
 963                         HitFilter original;
 964
 965                         public NotHitFilter (HitFilter original)
 966                         {
 967                                 this.original = original;
 968                         }
 969
 970                         public bool HitFilter (Hit hit)
 971                         {
 972                                 return ! original (hit);
 973                         }
 974                 }
 975
 976                 //////////////////////////////////////////////////////////////////////////////
 977
 978                 //
 979                 // Queries
 980                 //
 981
 982                 static private LNS.Query StringToQuery (string field_name,
 983                                                         string text,
 984                                                         ArrayList term_list)
 985                 {
 986                         ArrayList tokens = new ArrayList ();
 987
 988                         // Use the analyzer to extract the query's tokens.
 989                         // This code is taken from Lucene's query parser.
 990                         TokenStream source = QueryAnalyzer.TokenStream (field_name, new StringReader (text));
 991                         while (true) {
 992                                 Lucene.Net.Analysis.Token token;
 993                                 try {
 994                                         token = source.Next ();
 995                                         if (token == null)
 996                                                 break;
 997                                 } catch (IOException) {
 998                                         break;
 999                                 }
1000                                 if (token != null)
1001                                         tokens.Add (token.TermText ());
1002                         }
1003                         try {
1004                                 source.Close ();
1005                         } catch (IOException) {
1006                                 // ignore
1007                         }
1008
1009                         if (tokens.Count == 0)
1010                                 return null;
1011
1012                         LNS.PhraseQuery query = new LNS.PhraseQuery ();
1013
1014                         foreach (string token in tokens) {
1015                                 Term term;
1016                                 term = new Term (field_name, token);
1017                                 query.Add (term);
1018                                 if (term_list != null)
1019                                         term_list.Add (term);
1020                         }
1021
1022                         return query;
1023                 }
1024
1025                 //
1026                 // Date Range Handling
1027                 //
1028
1029                 // This function will break down dates to discrete chunks of
1030                 // time to avoid expanding RangeQuerys as much as possible.
1031                 // For example, searching for
1032                 //
1033                 // YMD(5 May 2005, 16 Oct 2006)
1034                 //
1035                 // would break down into three queries:
1036                 //
1037                 // (YM(May 2005) AND D(5,31)) OR
1038                 // YM(Jun 2005, Sep 2006) OR
1039                 // (YM(Oct 2006) AND D(1,16))
1040
1041                 static private DateTime lower_bound = DateTimeUtil.UnixToDateTimeUtc (0);
1042
1043                 // FIXME: we should probably boost this sometime around 2030.
1044                 // Mark your calendar.
1045                 static private DateTime upper_bound = new DateTime (2038, 12, 31);
1046
1047                 static private Term NewYearMonthTerm (string field_name, int y, int m)
1048                 {
1049                         return new Term ("YM:" + field_name, String.Format ("{0}{1:00}", y, m));
1050                 }
1051
1052                 static private LNS.Query NewYearMonthQuery (string field_name, int y, int m)
1053                 {
1054                         return new LNS.TermQuery (NewYearMonthTerm (field_name, y, m));
1055                 }
1056
1057                 static private LNS.Query NewYearMonthQuery (string field_name, int y1, int m1, int y2, int m2)
1058                 {
1059                         return new LNS.RangeQuery (NewYearMonthTerm (field_name, y1, m1),
1060                                                    NewYearMonthTerm (field_name, y2, m2),
1061                                                    true); // query is inclusive
1062                 }
1063
1064                 static private Term NewDayTerm (string field_name, int d)
1065                 {
1066                         return new Term ("D:" + field_name, String.Format ("{0:00}", d));
1067                 }
1068
1069                 static private LNS.Query NewDayQuery (string field_name, int d1, int d2)
1070                 {
1071                         return new LNS.RangeQuery (NewDayTerm (field_name, d1),
1072                                                    NewDayTerm (field_name, d2),
1073                                                    true); // query is inclusive
1074                 }
1075
1076                 private class DateRangeHitFilter {
1077                         public string Key;
1078                         public DateTime StartDate;
1079                         public DateTime EndDate;
1080
1081                         public bool HitFilter (Hit hit)
1082                         {
1083                                 // First, check the Timestamp
1084                                 if (Key == QueryPart_DateRange.AllPropertiesKey
1085                                     || Key == QueryPart_DateRange.TimestampKey) {
1086                                         DateTime dt;
1087                                         dt = hit.Timestamp;
1088                                         if (StartDate <= dt && dt <= EndDate)
1089                                                 return true;
1090                                         if (Key == QueryPart_DateRange.TimestampKey)
1091                                                 return false;
1092                                 }
1093
1094                                 if (Key == QueryPart_DateRange.AllPropertiesKey) {
1095                                         // Walk through all of the properties, and see if any
1096                                         // date properties fall inside the range.
1097                                         foreach (Property prop in hit.Properties) {
1098                                                 if (prop.Type == PropertyType.Date) {
1099                                                         DateTime dt;
1100                                                         dt = StringFu.StringToDateTime (prop.Value);
1101                                                         if (StartDate <= dt && dt <= EndDate)
1102                                                                 return true;
1103                                                 }
1104                                         }
1105                                         return false;
1106                                 } else {
1107                                         // Walk through all of the properties with the given key,
1108                                         // and see if any of them fall inside of the range.
1109                                         string[] values;
1110                                         values = hit.GetProperties (Key);
1111                                         foreach (string v in values) {
1112                                                 DateTime dt;
1113                                                 dt = StringFu.StringToDateTime (v);
1114                                                 if (StartDate <= dt && dt <= EndDate)
1115                                                         return true;
1116                                         }
1117                                         return false;
1118                                 }
1119                         }
1120                 }
1121
1122                 static private LNS.Query GetDateRangeQuery (QueryPart_DateRange part, out HitFilter hit_filter)
1123                 {
1124                         string field_name;
1125                         if (part.Key == QueryPart_DateRange.AllPropertiesKey)
1126                                 field_name = TypeToWildcardField (PropertyType.Date);
1127                         else if (part.Key == QueryPart_DateRange.TimestampKey)
1128                                 field_name = "Timestamp";
1129                         else
1130                                 field_name = PropertyToFieldName (PropertyType.Date, part.Key);
1131
1132                         // FIXME: We could optimize this and reduce the size of our range
1133                         // queries if we actually new the min and max date that appear in
1134                         // any properties in the index.  We would need to inspect the index to
1135                         // determine that at start-up, and then track it as new documents
1136                         // get added to the index.
1137                         if (part.StartDate < lower_bound)
1138                                 part.StartDate = lower_bound;
1139                         if (part.EndDate > upper_bound || part.EndDate == DateTime.MinValue)
1140                                 part.EndDate = upper_bound;
1141
1142                         // Swap the start and end dates if they come in reversed.
1143                         if (part.StartDate > part.EndDate) {
1144                                 DateTime swap;
1145                                 swap = part.StartDate;
1146                                 part.StartDate = part.EndDate;
1147                                 part.EndDate = swap;
1148                         }
1149
1150                         // Set up our hit filter to cull out the bad dates.
1151                         DateRangeHitFilter drhf;
1152                         drhf = new DateRangeHitFilter ();
1153                         drhf.Key = part.Key;
1154                         drhf.StartDate = part.StartDate;
1155                         drhf.EndDate = part.EndDate;
1156                         hit_filter = new HitFilter (drhf.HitFilter);
1157
1158                         Logger.Log.Debug ("Building new date range query");
1159                         Logger.Log.Debug ("Start: {0}", part.StartDate);
1160                         Logger.Log.Debug ("End: {0}", part.EndDate);
1161
1162                         int y1, m1, d1, y2, m2, d2;
1163                         y1 = part.StartDate.Year;
1164                         m1 = part.StartDate.Month;
1165                         d1 = part.StartDate.Day;
1166                         y2 = part.EndDate.Year;
1167                         m2 = part.EndDate.Month;
1168                         d2 = part.EndDate.Day;
1169
1170                         LNS.BooleanQuery top_level_query;
1171                         top_level_query = new LNS.BooleanQuery ();
1172
1173                         // A special case: both the start and the end of our range fall
1174                         // in the same month.
1175                         if (y1 == y2 && m1 == m2) {
1176                                 LNS.Query ym_query;
1177                                 ym_query = NewYearMonthQuery (field_name, y1, m1);
1178
1179                                 // If our range only covers a part of the month, do a range query on the days.
1180                                 if (d1 != 1 || d2 != DateTime.DaysInMonth (y2, m2)) {
1181                                         LNS.BooleanQuery sub_query;
1182                                         sub_query = new LNS.BooleanQuery ();
1183                                         sub_query.Add (ym_query, true, false);
1184                                         sub_query.Add (NewDayQuery (field_name, d1, d2), true, false);
1185                                         top_level_query.Add (sub_query, false, false);
1186                                 } else {
1187                                         top_level_query.Add (ym_query, false, false);
1188                                 }
1189
1190                         } else {
1191
1192                                 // Handle a partial month at the beginning of our range.
1193                                 if (d1 > 1) {
1194                                         LNS.BooleanQuery sub_query;
1195                                         sub_query = new LNS.BooleanQuery ();
1196                                         sub_query.Add (NewYearMonthQuery (field_name, y1, m1), true, false);
1197                                         sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), true, false);
1198                                         top_level_query.Add (sub_query, false, false);
1199
1200                                         ++m1;
1201                                         if (m1 == 13) {
1202                                                 m1 = 1;
1203                                                 ++y1;
1204                                         }
1205                                 }
1206
1207                                 // And likewise, handle a partial month at the end of our range.
1208                                 if (d2 < DateTime.DaysInMonth (y2, m2)) {
1209                                         LNS.BooleanQuery sub_query;
1210                                         sub_query = new LNS.BooleanQuery ();
1211                                         sub_query.Add (NewYearMonthQuery (field_name, y2, m2), true, false);
1212                                         sub_query.Add (NewDayQuery (field_name, 1, d2), true, false);
1213                                         top_level_query.Add (sub_query, false, false);
1214
1215                                         --m2;
1216                                         if (m2 == 0) {
1217                                                 m2 = 12;
1218                                                 --y2;
1219                                         }
1220                                 }
1221
1222                                 // Generate the query for the "middle" of our period, if it is non-empty
1223                                 if (y1 < y2 || ((y1 == y2) && m1 <= m2))
1224                                         top_level_query.Add (NewYearMonthQuery (field_name, y1, m1, y2, m2),
1225                                                              false, false);
1226                         }
1227
1228                         return top_level_query;
1229                 }
1230
1231                 // search_subset_uris is a list of Uris that this search should be
1232                 // limited to.
1233                 static protected void QueryPartToQuery (QueryPart     abstract_part,
1234                                                         bool          only_build_primary_query,
1235                                                         ArrayList     term_list,
1236                                                         out LNS.Query primary_query,
1237                                                         out LNS.Query secondary_query,
1238                                                         out HitFilter hit_filter)
1239                 {
1240                         primary_query = null;
1241                         secondary_query = null;
1242
1243                         // By default, we assume that our lucene queries will return exactly the
1244                         // matching set of objects.  We need to set the hit filter if further
1245                         // refinement of the search results is required.  (As in the case of
1246                         // date range queries, for example.)  We essentially have to do this
1247                         // to make OR queries work correctly.
1248                         hit_filter = true_hit_filter;
1249
1250                         // The exception is when dealing with a prohibited part.  Just return
1251                         // null for the hit filter in that case.  This works since
1252                         // prohibited parts are not allowed inside of OR queries.
1253                         if (abstract_part.Logic == QueryPartLogic.Prohibited)
1254                                 hit_filter = null;
1255
1256                         if (abstract_part == null)
1257                                 return;
1258
1259                         if (abstract_part is QueryPart_Text) {
1260                                 QueryPart_Text part = (QueryPart_Text) abstract_part;
1261
1262                                 if (! (part.SearchFullText || part.SearchTextProperties))
1263                                         return;
1264
1265                                 LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
1266                                 LNS.BooleanQuery s_query = new LNS.BooleanQuery ();
1267
1268                                 if (part.SearchFullText) {
1269                                         LNS.Query subquery;
1270                                         subquery = StringToQuery ("Text", part.Text, term_list);
1271                                         if (subquery != null)
1272                                                 p_query.Add (subquery, false, false);
1273
1274                                         // FIXME: HotText is ignored for now!
1275                                         // subquery = StringToQuery ("HotText", part.Text);
1276                                         // if (subquery != null)
1277                                         //    p_query.Add (subquery, false, false);
1278                                 }
1279
1280                                 if (part.SearchTextProperties) {
1281                                         LNS.Query subquery;
1282                                         subquery = StringToQuery ("PropertyText", part.Text, term_list);
1283                                         if (subquery != null) {
1284                                                 p_query.Add (subquery, false, false);
1285                                                 // Properties can live in either index
1286                                                 if (! only_build_primary_query)
1287                                                         s_query.Add (subquery.Clone () as LNS.Query, false, false);
1288                                         }
1289
1290                                         Term term;
1291                                         term = new Term ("PropertyKeyword", part.Text.ToLower ()); // make sure text is lowercased
1292                                         // FIXME: terms are already added in term_list. But they may have been tokenized
1293                                         // The term here is non-tokenized version. Should this be added to term_list ?
1294                                         // term_list is used to calculate scores
1295                                         if (term_list != null)
1296                                                 term_list.Add (term);
1297                                         subquery = new LNS.TermQuery (term);
1298                                         p_query.Add (subquery, false, false);
1299                                         // Properties can live in either index
1300                                         if (! only_build_primary_query)
1301                                                 s_query.Add (subquery.Clone () as LNS.Query, false, false);
1302                                 }
1303
1304                                 primary_query = p_query;
1305                                 if (! only_build_primary_query)
1306                                         secondary_query = s_query;
1307
1308                                 return;
1309                         }
1310
1311                         if (abstract_part is QueryPart_Wildcard) {
1312                                 QueryPart_Wildcard part = (QueryPart_Wildcard) abstract_part;
1313
1314                                 LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
1315                                 LNS.BooleanQuery s_query = new LNS.BooleanQuery ();
1316
1317                                 Term term;
1318                                 LNS.Query subquery;
1319
1320                                 // Lower case the terms for searching
1321                                 string query_string_lower = part.QueryString.ToLower ();
1322
1323                                 // Search text content
1324                                 term = new Term ("Text", query_string_lower);
1325                                 subquery = new LNS.WildcardQuery (term);
1326                                 p_query.Add (subquery, false, false);
1327                                 term_list.Add (term);
1328
1329                                 // Search text properties
1330                                 term = new Term ("PropertyText", query_string_lower);
1331                                 subquery = new LNS.WildcardQuery (term);
1332                                 p_query.Add (subquery, false, false);
1333                                 // Properties can live in either index
1334                                 if (! only_build_primary_query)
1335                                         s_query.Add (subquery.Clone () as LNS.Query, false, false);
1336                                 term_list.Add (term);
1337
1338                                 // Search property keywords
1339                                 term = new Term ("PropertyKeyword", query_string_lower);
1340                                 term_list.Add (term);
1341                                 subquery = new LNS.WildcardQuery (term);
1342                                 p_query.Add (subquery, false, false);
1343                                 // Properties can live in either index
1344                                 if (! only_build_primary_query)
1345                                         s_query.Add (subquery.Clone () as LNS.Query, false, false);
1346
1347                                 primary_query = p_query;
1348                                 if (! only_build_primary_query)
1349                                         secondary_query = s_query;
1350
1351                                 return;
1352                         }
1353
1354                         if (abstract_part is QueryPart_Property) {
1355                                 QueryPart_Property part = (QueryPart_Property) abstract_part;
1356
1357                                 string field_name;
1358                                 if (part.Key == QueryPart_Property.AllProperties)
1359                                         field_name = TypeToWildcardField (part.Type);
1360                                 else
1361                                         field_name = PropertyToFieldName (part.Type, part.Key);
1362
1363                                 if (part.Type == PropertyType.Text)
1364                                         primary_query = StringToQuery (field_name, part.Value, term_list);
1365                                 else {
1366                                         Term term;
1367                                         term = new Term (field_name, part.Value.ToLower ());
1368                                         if (term_list != null)
1369                                                 term_list.Add (term);
1370                                         primary_query = new LNS.TermQuery (term);
1371                                 }
1372
1373                                 // Properties can live in either index
1374                                 if (! only_build_primary_query && primary_query != null)
1375                                         secondary_query = primary_query.Clone () as LNS.Query;
1376
1377                                 return;
1378                         }
1379
1380                         if (abstract_part is QueryPart_DateRange) {
1381
1382                                 QueryPart_DateRange part = (QueryPart_DateRange) abstract_part;
1383
1384                                 primary_query = GetDateRangeQuery (part, out hit_filter);
1385                                 // Date properties can live in either index
1386                                 if (! only_build_primary_query && primary_query != null)
1387                                         secondary_query = primary_query.Clone () as LNS.Query;
1388
1389                                 // If this is a prohibited part, invert our hit filter.
1390                                 if (part.Logic == QueryPartLogic.Prohibited) {
1391                                         NotHitFilter nhf;
1392                                         nhf = new NotHitFilter (hit_filter);
1393                                         hit_filter = new HitFilter (nhf.HitFilter);
1394                                 }
1395
1396                                 return;
1397                         }
1398
1399                         if (abstract_part is QueryPart_Or) {
1400                                 QueryPart_Or part = (QueryPart_Or) abstract_part;
1401
1402                                 // Assemble a new BooleanQuery combining all of the sub-parts.
1403                                 LNS.BooleanQuery p_query;
1404                                 p_query = new LNS.BooleanQuery ();
1405
1406                                 LNS.BooleanQuery s_query = null;
1407                                 if (! only_build_primary_query)
1408                                         s_query = new LNS.BooleanQuery ();
1409
1410                                 primary_query = p_query;
1411                                 secondary_query = s_query;
1412
1413                                 OrHitFilter or_hit_filter = null;
1414
1415                                 foreach (QueryPart  sub_part in part.SubParts) {
1416                                         LNS.Query p_subq, s_subq;
1417                                         HitFilter sub_hit_filter; // FIXME: This is (and must be) ignored
1418                                         // FIXME: Any subpart in an OR which has a hit filter won't work
1419                                         // correctly, because we can't tell which part of an OR we matched
1420                                         // against to filter correctly.  This affects date range queries.
1421                                         QueryPartToQuery (sub_part, only_build_primary_query,
1422                                                           term_list,
1423                                                           out p_subq, out s_subq, out sub_hit_filter);
1424                                         if (p_subq != null)
1425                                                 p_query.Add (p_subq, false, false);
1426                                         if (s_subq != null)
1427                                                 s_query.Add (s_subq, false, false);
1428                                         if (sub_hit_filter != null) {
1429                                                 if (or_hit_filter == null)
1430                                                         or_hit_filter = new OrHitFilter ();
1431                                                 or_hit_filter.Add (sub_hit_filter);
1432                                         }
1433                                 }
1434
1435                                 if (or_hit_filter != null)
1436                                         hit_filter = new HitFilter (or_hit_filter.HitFilter);
1437
1438                                 return;
1439                         }
1440
1441                         throw new Exception ("Unhandled QueryPart type! " + abstract_part.ToString ());
1442                 }
1443
1444                 static protected LNS.Query UriQuery (string field_name, Uri uri)
1445                 {
1446                         return new LNS.TermQuery (new Term (field_name, UriFu.UriToEscapedString (uri)));
1447                 }
1448
1449                 static protected LNS.Query UriQuery (string field_name, ICollection uri_list)
1450                 {
1451                         return UriQuery (field_name, uri_list, null);
1452                 }
1453
1454                 static protected LNS.Query UriQuery (string field_name, ICollection uri_list, LNS.Query extra_requirement)
1455                 {
1456                         if (uri_list.Count == 0)
1457                                 return null;
1458
1459                         int max_clauses;
1460                         max_clauses = LNS.BooleanQuery.GetMaxClauseCount ();
1461
1462                         int N;
1463                         N = 1 + (uri_list.Count - 1) / max_clauses;
1464
1465                         LNS.BooleanQuery top_query;
1466                         top_query = new LNS.BooleanQuery ();
1467
1468                         int cursor = 0;
1469                         if (extra_requirement != null) {
1470                                 top_query.Add (extra_requirement, true, false);
1471                                 ++cursor;
1472                         }
1473
1474                         ArrayList bottom_queries = null;
1475
1476                         if (N > 1) {
1477                                 bottom_queries = new ArrayList ();
1478                                 for (int i = 0; i < N; ++i) {
1479                                         LNS.BooleanQuery bq;
1480                                         bq = new LNS.BooleanQuery ();
1481                                         bottom_queries.Add (bq);
1482                                         top_query.Add (bq, false, false);
1483                                 }
1484                         }
1485
1486                         foreach (Uri uri in uri_list) {
1487                                 LNS.Query subquery;
1488                                 subquery = UriQuery (field_name, uri);
1489
1490                                 LNS.BooleanQuery target;
1491                                 if (N == 1)
1492                                         target = top_query;
1493                                 else {
1494                                         target = (LNS.BooleanQuery) bottom_queries [cursor];
1495                                         ++cursor;
1496                                         if (cursor >= N)
1497                                                 cursor = 0;
1498                                 }
1499
1500                                 target.Add (subquery, false, false);
1501                         }
1502
1503                         return top_query;
1504                 }
1505
1506                 ///////////////////////////////////////////////////////////////////////////////////
1507
1508                 public int SegmentCount {
1509                         get {
1510                                 DirectoryInfo dir_info;
1511                                 int p_count = 0, s_count = 0;
1512
1513                                 dir_info = new DirectoryInfo (PrimaryIndexDirectory);
1514                                 foreach (FileInfo file_info in dir_info.GetFiles ())
1515                                         if (file_info.Extension == ".cfs")
1516                                                 ++p_count;
1517
1518                                 dir_info = new DirectoryInfo (SecondaryIndexDirectory);
1519                                 foreach (FileInfo file_info in dir_info.GetFiles ())
1520                                         if (file_info.Extension == ".cfs")
1521                                                 ++s_count;
1522
1523                                 return p_count > s_count ? p_count : s_count;
1524                         }
1525                 }
1526
1527                 ///////////////////////////////////////////////////////////////////////////////////
1528
1529                 // Cache IndexReaders on a per-Lucene index basis, since they
1530                 // are extremely expensive to create.  Note that using this
1531                 // only makes sense in situations where the index only
1532                 // possibly might change from underneath us, but most of the
1533                 // time probably won't.  This means it makes sense to do
1534                 // this in LuceneQueryingDriver.cs, but it doesn't in
1535                 // LuceneIndexingDriver.cs.
1536
1537                 private class ReaderAndVersion {
1538
1539                         public IndexReader Reader;
1540                         public long Version;
1541                         public int Refcount;
1542
1543                         public ReaderAndVersion (IndexReader reader, long version)
1544                         {
1545                                 this.Reader = reader;
1546                                 this.Version = version;
1547                                 this.Refcount = 1;
1548                         }
1549                 }
1550
1551                 static private Hashtable directory_rav_map = new Hashtable ();
1552                 static private Hashtable reader_rav_map = new Hashtable ();
1553
1554                 static public LNS.IndexSearcher GetSearcher (Lucene.Net.Store.Directory directory)
1555                 {
1556                         IndexReader reader = GetReader (directory);
1557
1558                         return new LNS.IndexSearcher (reader);
1559                 }
1560
1561                 static public IndexReader GetReader (Lucene.Net.Store.Directory directory)
1562                 {
1563                         IndexReader reader;
1564                         long version;
1565
1566                         lock (reader_rav_map) {
1567                                 ReaderAndVersion rav = (ReaderAndVersion) directory_rav_map [directory];
1568
1569                                 if (rav == null) {
1570                                         version = IndexReader.GetCurrentVersion (directory);
1571                                         reader = IndexReader.Open (directory);
1572
1573                                         rav = new ReaderAndVersion (reader, version);
1574
1575                                         directory_rav_map [directory] = rav;
1576                                         reader_rav_map [reader] = rav;
1577
1578                                         return reader;
1579                                 }
1580
1581                                 version = IndexReader.GetCurrentVersion (directory);
1582
1583                                 if (version != rav.Version) {
1584                                         reader = IndexReader.Open (directory);
1585
1586                                         rav = new ReaderAndVersion (reader, version);
1587
1588                                         directory_rav_map [directory] = rav;
1589                                         reader_rav_map [reader] = rav;
1590                                 } else {
1591                                         rav.Refcount++;
1592                                 }
1593
1594                                 return rav.Reader;
1595                         }
1596                 }
1597
1598                 static private void UnrefReaderAndVersion_Unlocked (ReaderAndVersion rav)
1599                 {
1600                         rav.Refcount--;
1601
1602                         if (rav.Refcount == 0) {
1603                                 reader_rav_map.Remove (rav.Reader);
1604                                 directory_rav_map.Remove (rav.Reader.Directory ());
1605                                 rav.Reader.Close ();
1606                         }
1607                 }
1608
1609                 static public void ReleaseReader (IndexReader reader)
1610                 {
1611                         lock (reader_rav_map) {
1612                                 ReaderAndVersion rav = (ReaderAndVersion) reader_rav_map [reader];
1613
1614                                 if (rav != null)
1615                                         UnrefReaderAndVersion_Unlocked (rav);
1616                                 else
1617                                         reader.Close ();
1618                         }
1619                 }
1620
1621                 static public void ReleaseSearcher (LNS.IndexSearcher searcher)
1622                 {
1623                         IndexReader reader = searcher.GetIndexReader ();
1624
1625                         searcher.Close ();
1626                         ReleaseReader (reader);
1627                 }
1628
1629                 ///////////////////////////////////////////////////////////////////////////////////
1630
1631                 //
1632                 // Various ways to grab lots of hits at once.
1633                 // These should never be used for querying, only for utility
1634                 // functions.
1635                 //
1636
1637                 public int GetBlockOfHits (int cookie,
1638                                            Hit [] block_of_hits)
1639                 {
1640                         IndexReader primary_reader;
1641                         IndexReader secondary_reader;
1642                         primary_reader = GetReader (PrimaryStore);
1643                         secondary_reader = GetReader (SecondaryStore);
1644
1645                         int request_size;
1646                         request_size = block_of_hits.Length;
1647                         if (request_size > primary_reader.NumDocs ())
1648                                 request_size = primary_reader.NumDocs ();
1649
1650                         int max_doc;
1651                         max_doc = primary_reader.MaxDoc ();
1652
1653                         if (cookie < 0) {
1654                                 Random random;
1655                                 random = new Random ();
1656                                 cookie = random.Next (max_doc);
1657                         }
1658
1659                         int original_cookie;
1660                         original_cookie = cookie;
1661
1662                         Hashtable primary_docs, secondary_docs;
1663                         primary_docs = UriFu.NewHashtable ();
1664                         secondary_docs = UriFu.NewHashtable ();
1665
1666                         // Load the primary documents
1667                         for (int i = 0; i < request_size; ++i) {
1668
1669                                 if (! primary_reader.IsDeleted (cookie)) {
1670                                         Document doc;
1671                                         doc = primary_reader.Document (cookie);
1672                                         primary_docs [GetUriFromDocument (doc)] = doc;
1673                                 }
1674
1675                                 ++cookie;
1676                                 if (cookie >= max_doc) // wrap around
1677                                         cookie = 0;
1678
1679                                 // If we somehow end up back where we started,
1680                                 // give up.
1681                                 if (cookie == original_cookie)
1682                                         break;
1683                         }
1684
1685                         // If necessary, load the secondary documents
1686                         if (secondary_reader != null) {
1687                                 LNS.IndexSearcher searcher;
1688                                 searcher = new LNS.IndexSearcher (secondary_reader);
1689
1690                                 LNS.Query uri_query;
1691                                 uri_query = UriQuery ("Uri", primary_docs.Keys);
1692
1693                                 LNS.Hits hits;
1694                                 hits = searcher.Search (uri_query);
1695                                 for (int i = 0; i < hits.Length (); ++i) {
1696                                         Document doc;
1697                                         doc = hits.Doc (i);
1698                                         secondary_docs [GetUriFromDocument (doc)] = doc;
1699                                 }
1700
1701                                 searcher.Close ();
1702                         }
1703
1704                         ReleaseReader (primary_reader);
1705                         ReleaseReader (secondary_reader);
1706
1707                         // Now assemble the hits
1708                         int j = 0;
1709                         foreach (Uri uri in primary_docs.Keys) {
1710                                 Document primary_doc, secondary_doc;
1711                                 primary_doc = primary_docs [uri] as Document;
1712                                 secondary_doc = secondary_docs [uri] as Document;
1713
1714                                 Hit hit;
1715                                 hit = DocumentToHit (primary_doc);
1716                                 if (secondary_doc != null)
1717                                         AddPropertiesToHit (hit, secondary_doc, false);
1718
1719                                 block_of_hits [j] = hit;
1720                                 ++j;
1721                         }
1722
1723                         // null-pad the array, if necessary
1724                         for (; j < block_of_hits.Length; ++j)
1725                                 block_of_hits [j] = null;
1726
1727
1728                         // Return the new cookie
1729                         return cookie;
1730                 }
1731
1732                 // For a large index, this will be very slow and will consume
1733                 // a lot of memory.  Don't call it without a good reason!
1734                 // We return a hashtable indexed by Uri.
1735                 public Hashtable GetAllHitsByUri ()
1736                 {
1737                         Hashtable all_hits;
1738                         all_hits = UriFu.NewHashtable ();
1739
1740                         IndexReader primary_reader;
1741                         IndexReader secondary_reader;
1742                         primary_reader = GetReader (PrimaryStore);
1743                         secondary_reader = GetReader (SecondaryStore);
1744
1745                         // Load everything from the primary index
1746                         int max_doc;
1747                         max_doc = primary_reader.MaxDoc ();
1748                         for (int i = 0; i < max_doc; ++i) {
1749
1750                                 if (primary_reader.IsDeleted (i))
1751                                         continue;
1752
1753                                 Document doc;
1754                                 doc = primary_reader.Document (i);
1755
1756                                 Hit hit;
1757                                 hit = DocumentToHit (doc);
1758                                 all_hits [hit.Uri] = hit;
1759                         }
1760
1761                         // Now add in everything from the secondary index, if it exists
1762                         if (secondary_reader != null) {
1763                                 max_doc = secondary_reader.MaxDoc ();
1764                                 for (int i = 0; i < max_doc; ++i) {
1765
1766                                         if (secondary_reader.IsDeleted (i))
1767                                                 continue;
1768
1769                                         Document doc;
1770                                         doc = secondary_reader.Document (i);
1771
1772                                         Uri uri;
1773                                         uri = GetUriFromDocument (doc);
1774
1775                                         Hit hit;
1776                                         hit = (Hit) all_hits [uri];
1777                                         if (hit != null)
1778                                                 AddPropertiesToHit (hit, doc, false);
1779                                 }
1780                         }
1781
1782                         ReleaseReader (primary_reader);
1783                         ReleaseReader (secondary_reader);
1784
1785                         return all_hits;
1786                 }
1787         }
1788 }