4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 using System
.Collections
;
36 using Hit
= Beagle
.Hit
;
38 using Lucene
.Net
.Index
;
39 using Lucene
.Net
.Search
;
40 using Lucene
.Net
.Documents
;
44 public class HitByUriComparer
: IComparer
{
46 public int Compare (object a
, object b
)
48 // All of this mapping to and from strings is dreadful.
49 return String
.Compare (((Hit
) a
).Uri
.ToString (), ((Hit
) b
).Uri
.ToString ());
53 static string RemapUriToPath (Hashtable all_hits_by_uri
, Hit hit
)
55 string exact_name
= hit
.GetFirstProperty ("beagle:ExactFilename");
56 string parent_uri_str
= hit
.GetFirstProperty ("_private:ParentDirUri");
58 if (parent_uri_str
== null)
61 return Path
.Combine (RemapUriToPath (all_hits_by_uri
, (Hit
) all_hits_by_uri
[parent_uri_str
]),
65 static int DumpOneIndex_Metadata (string index_name
, bool only_dump_the_urls
)
67 Console
.WriteLine (); // a visual cue that something has changed
68 LuceneQueryingDriver driver
;
69 driver
= new LuceneQueryingDriver (index_name
, -1, true);
71 Hashtable all_hits_by_uri
;
72 all_hits_by_uri
= driver
.GetAllHitsByUri ();
75 all_hits
= new ArrayList (all_hits_by_uri
.Values
);
77 if (index_name
== "FileSystemIndex") // A hard-wired hack
78 foreach (Hit hit
in all_hits
)
79 hit
.Uri
= UriFu
.PathToFileUri (RemapUriToPath (all_hits_by_uri
, hit
));
81 all_hits
.Sort (new HitByUriComparer ());
83 foreach (Hit hit
in all_hits
) {
85 if (only_dump_the_urls
) {
86 Console
.WriteLine ("{0}: {1}", index_name
, hit
.Uri
);
90 Console
.WriteLine (" Index: {0}", index_name
);
91 Console
.WriteLine (" Uri: {0}", hit
.Uri
);
92 if (hit
.ParentUri
!= null)
93 Console
.WriteLine ("Parent: {0}", hit
.ParentUri
);
94 Console
.WriteLine (" MimeT: {0}", hit
.MimeType
);
95 Console
.WriteLine (" Type: {0}", hit
.Type
);
98 props
= new ArrayList (hit
.Properties
);
100 foreach (Property prop
in props
)
101 if (! prop
.Key
.StartsWith ("_private:"))
102 Console
.WriteLine (" Prop: {0} = '{1}'", prop
.Key
, prop
.Value
);
104 Console
.WriteLine ();
107 return all_hits
.Count
;
110 static Term initial_enum_term
;
111 // Dump the term frequencies: we do this via direct Lucene access.
112 static void DumpOneIndex_TermFrequencies (string index_name
)
114 LuceneQueryingDriver driver
;
115 driver
= new LuceneQueryingDriver (index_name
, -1, true);
118 reader
= IndexReader
.Open (driver
.PrimaryStore
);
121 term_enum
= reader
.Terms (initial_enum_term
);
123 int distinct_term_count
= 0;
127 // Terms are sorted first by field, then by text
128 // so all terms with a given field are adjacent in enumerations.
129 if (term_enum
.Term () != null) {
130 while (term_enum
.Term().Field() == "Text") {
132 freq
= term_enum
.DocFreq ();
134 Console
.WriteLine ("{0} {1} {2}", index_name
, term_enum
.Term ().Text (), freq
);
136 // FIXME: spew these as a count
137 ++distinct_term_count
;
140 if (!term_enum
.Next ())
148 Console
.WriteLine ();
151 public class IndexInfo
: IComparable
{
155 public IndexInfo (string name
)
160 public int CompareTo (object obj
)
162 IndexInfo other
= (IndexInfo
) obj
;
163 return String
.Compare (this.Name
, other
.Name
);
167 static void DumpIndexInformation (Mode mode
, bool show_counts
)
169 ArrayList index_info_list
;
170 index_info_list
= new ArrayList ();
173 dir
= new DirectoryInfo (PathFinder
.IndexDir
);
174 foreach (DirectoryInfo subdir
in dir
.GetDirectories ())
175 index_info_list
.Add (new IndexInfo (subdir
.Name
));
177 index_info_list
.Sort ();
179 bool set_counts
= false;
181 if (mode
== Mode
.TermFrequencies
)
182 initial_enum_term
= new Term ("Text", "");
184 foreach (IndexInfo info
in index_info_list
) {
185 if (mode
== Mode
.Uris
|| mode
== Mode
.Properties
) {
186 info
.Count
= DumpOneIndex_Metadata (info
.Name
, mode
== Mode
.Uris
);
189 DumpOneIndex_TermFrequencies (info
.Name
);
193 if (show_counts
&& set_counts
) {
194 Console
.WriteLine ();
195 Console
.WriteLine ("FINAL COUNTS");
197 foreach (IndexInfo info
in index_info_list
)
198 Console
.WriteLine ("{0} {1}", info
.Count
.ToString ().PadLeft (7), info
.Name
);
202 class DummyQueryResult
: IQueryResult
{
203 public void Add (ICollection hits
)
207 public void Add (ICollection hits
, int total_results
)
211 public void Subtract (ICollection hits
)
216 static void DumpFileIndexInformation (string path
, string indexdir
)
218 //Uri uri = UriFu.PathToFileUri (path);
219 //Console.WriteLine ("Dumping information about:" + uri.AbsolutePath);
220 //path = uri.AbsolutePath;
221 if ((! File
.Exists (path
)) && (! Directory
.Exists (path
))) {
222 Console
.WriteLine ("No such file or directory:" + path
);
226 if (indexdir
== null)
227 // default is ~/.beagle/Indexes/FileSystemIndex
228 indexdir
= Path
.Combine (PathFinder
.IndexDir
, "FileSystemIndex");
229 if (! Directory
.Exists (indexdir
)) {
230 Console
.WriteLine ("Index:{0} doesnt exist.", indexdir
);
236 reader
= new StreamReader (Path
.Combine (indexdir
, "fingerprint"));
237 string fingerprint
= reader
.ReadLine ();
239 //Console.WriteLine ("Read fingerprint:" + fingerprint);
242 FileAttributesStore fa_store
= new FileAttributesStore (new FileAttributesStore_Mixed (indexdir
, fingerprint
));
243 Beagle
.Daemon
.FileAttributes attr
= fa_store
.Read (path
);
245 Console
.WriteLine ("No information about this file in index. Ignoring.");
248 string uri_string
= "uid:" + GuidFu
.ToShortString (attr
.UniqueId
);
249 Console
.WriteLine ("Uri = " + uri_string
);
250 //Console.WriteLine ("FilterName:" + attr.FilterName);
251 Console
.WriteLine ("LastAttrTime:" + attr
.LastAttrTime
);
252 Console
.WriteLine ("LastWriteTime:" + attr
.LastWriteTime
);
254 LuceneQueryingDriver driver
;
255 driver
= new LuceneQueryingDriver (indexdir
, -1, true);
258 // first try for the Uri:"uid:xxxxxxxxxxxxxxx"
259 Lucene
.Net
.Search
.Query query
= new TermQuery(new Term("Uri", uri_string
));
260 if (DoQuery (driver
, query
))
263 // else query by path - this is for static indexes
264 path
= UriFu
.PathToFileUriString (path
);
265 Console
.WriteLine ("Querying by:[" + path
+ "]");
266 query
= new TermQuery(new Term("Uri", path
));
267 DoQuery (driver
, query
);
271 static bool DoQuery (LuceneQueryingDriver driver
, Lucene
.Net
.Search
.Query query
)
273 IndexSearcher primary_searcher
= LuceneCommon
.GetSearcher (driver
.PrimaryStore
);
274 IndexSearcher secondary_searcher
= LuceneCommon
.GetSearcher (driver
.SecondaryStore
);
276 Hits primary_hits
= primary_searcher
.Search(query
);
277 Hits secondary_hits
= secondary_searcher
.Search (query
);
278 Console
.WriteLine ("{0} hits from primary store; {1} hits from secondary store", primary_hits
.Length (), secondary_hits
.Length ());
280 Document primary_doc
, secondary_doc
;
281 // there should be exactly one primary hit and 0/1 secondary hit
282 if (primary_hits
.Length () == 1) {
283 primary_doc
= primary_hits
.Doc (0);
285 "------------[ Immutable data ]------------");
286 foreach (Field f
in primary_doc
.Fields ()) {
288 String name
= f
.Name ();
289 String val
= f
.StringValue ();
290 bool stored
= f
.IsStored ();
291 bool searchable
= (val
[0] == 's');
292 bool tokenized
= f
.IsTokenized();
293 if (name
.Length
>= 7 && name
.StartsWith ("prop:"))
294 tokenized
= (name
[5] != 't');
295 float boost
= f
.GetBoost();
296 Console
.WriteLine ("{0,-30} = [{1}]", name
, val
);
297 Console
.WriteLine ("{0,-32} ({1}stored, {2} searchable, {3} tokenized)",
299 (stored
? "" : "un"),
300 (searchable
? "" : "not"),
301 (tokenized
? "" : "not"));
306 if (secondary_hits
.Length () == 1) {
307 secondary_doc
= secondary_hits
.Doc (0);
309 "------------[ Mutable data ]-----------");
310 foreach (Field f
in secondary_doc
.Fields ()) {
312 String name
= f
.Name ();
313 String val
= f
.StringValue ();
314 bool stored
= f
.IsStored ();
315 bool searchable
= (val
[0] == 's');
316 bool tokenized
= f
.IsTokenized();
317 if (name
.Length
>= 7 && name
.StartsWith ("prop:"))
318 tokenized
= (name
[5] != 't');
319 float boost
= f
.GetBoost();
321 Console
.WriteLine ("{0,-30} = [{1}]", name
, val
);
322 Console
.WriteLine ("{0,-32} ({1}stored, {2} searchable, {3} tokenized)",
324 (stored
? "" : "un"),
325 (searchable
? "" : "not"),
326 (tokenized
? "" : "not"));
330 LuceneCommon
.ReleaseSearcher (primary_searcher
);
331 LuceneCommon
.ReleaseSearcher (secondary_searcher
);
333 if (primary_hits
.Length () != 0 || secondary_hits
.Length () != 0)
346 static void Main (string [] args
)
348 Mode mode
= Mode
.Uris
;
349 bool show_counts
= true;
351 string indexdir
= null;
353 foreach (string arg
in args
) {
358 Console
.WriteLine (@"
359 beagle-dump-index [options] [ [--indexdir=dir] file]
361 --uris Dump all Uris (default)
362 --properties Dump all properties
363 --term-frequencies Dump term frequencies
365 --show-counts Show index count totals (default)
366 --hide-counts Hide index count totals
368 --indexdir=<index directory>
369 Absolute path of the directory storing the index
370 e.g. /home/user/.beagle/Indexes/FileSystemIndex
371 file Get information in index about this file or directory
373 --help What you just did");
374 Environment
.Exit (0);
382 mode
= Mode
.Properties
;
385 case "--term-frequencies":
386 mode
= Mode
.TermFrequencies
;
389 case "--hide-counts":
393 case "--show-counts":
398 if (arg
.StartsWith ("--indexdir="))
399 indexdir
= arg
.Remove (0, 11);
407 DumpIndexInformation (mode
, show_counts
);
409 DumpFileIndexInformation (file
, indexdir
);