4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
30 using System
.Reflection
;
33 using System
.Xml
.Serialization
;
37 namespace Beagle
.Daemon
{
39 public class FilterFactory
{
41 static private bool Debug
= true;
43 static FilterFactory ()
45 string path
= Environment
.GetEnvironmentVariable ("BEAGLE_FILTER_PATH");
47 if (path
== null || path
== "")
48 path
= PathFinder
.FilterDir
;
49 else if (path
[path
.Length
-1] == ':')
50 path
+= PathFinder
.FilterDir
;
52 Hashtable seen
= new Hashtable ();
54 foreach (string dir
in path
.Split (':')) {
55 if (! seen
.Contains (dir
))
56 ScanDirectoryForAssemblies (dir
);
61 /////////////////////////////////////////////////////////////////////////
64 static private ICollection
CreateFilters (Uri uri
, string extension
, string mime_type
)
66 Hashtable matched_filters_by_flavor
= FilterFlavor
.NewHashtable ();
68 foreach (FilterFlavor flavor
in filter_types_by_flavor
.Keys
) {
69 if (flavor
.IsMatch (uri
, extension
, mime_type
)) {
70 Filter matched_filter
= null;
73 matched_filter
= (Filter
) Activator
.CreateInstance ((Type
) filter_types_by_flavor
[flavor
]);
75 if (flavor
.MimeType
!= null)
76 matched_filter
.MimeType
= flavor
.MimeType
;
77 if (flavor
.Extension
!= null)
78 matched_filter
.Extension
= flavor
.Extension
;
80 } catch (Exception e
) {
83 matched_filters_by_flavor
[flavor
] = matched_filter
;
87 foreach (DictionaryEntry entry
in matched_filters_by_flavor
) {
88 FilterFlavor flav
= (FilterFlavor
) entry
.Key
;
89 Filter filter
= (Filter
) entry
.Value
;
92 Logger
.Log
.Debug ("Found matching filter: {0}, Weight: {1}", filter
, flav
.Weight
);
95 return matched_filters_by_flavor
.Values
;
98 static public int GetFilterVersion (string filter_name
)
100 if (filter_versions_by_name
.Contains (filter_name
)) {
101 return (int) filter_versions_by_name
[filter_name
];
107 /////////////////////////////////////////////////////////////////////////
109 static public ICollection
CreateFiltersFromMimeType (string mime_type
)
111 return CreateFilters (null, null, mime_type
);
114 static public ICollection
CreateFilterFromExtension (string extension
)
116 return CreateFilters (null, extension
, null);
119 static public ICollection
CreateFiltersFromPath (string path
)
121 string guessed_mime_type
= Beagle
.Util
.VFS
.Mime
.GetMimeType (path
);
122 string extension
= Path
.GetExtension (path
);
123 return CreateFilters (UriFu
.PathToFileUri (path
), extension
, guessed_mime_type
);
126 static public ICollection
CreateFiltersFromUri (Uri uri
)
129 return CreateFiltersFromPath (uri
.LocalPath
);
131 return CreateFilters (uri
, null, null);
134 static public ICollection
CreateFiltersFromIndexable (Indexable indexable
)
136 string path
= indexable
.ContentUri
.LocalPath
;
137 string extension
= Path
.GetExtension (path
);
138 string mime_type
= indexable
.MimeType
;
139 return CreateFilters (UriFu
.PathToFileUri (path
), extension
, mime_type
);
142 /////////////////////////////////////////////////////////////////////////
144 static public TextReader
FilterFile (string path
)
146 return FilterFile (path
, null);
149 static public TextReader
FilterFile (string path
, string mime_type
)
151 if (mime_type
== null)
152 mime_type
= Beagle
.Util
.VFS
.Mime
.GetMimeType (path
);
154 if (mime_type
== null)
157 ICollection filters
= CreateFilters (UriFu
.PathToFileUri (path
), Path
.GetExtension (path
), mime_type
);
160 foreach (Filter candidate_filter
in filters
) {
162 Logger
.Log
.Debug ("Testing filter: {0}", candidate_filter
);
164 // Open the filter, and hook up the TextReader.
165 if (candidate_filter
.Open (path
)) {
166 reader
= candidate_filter
.GetTextReader ();
169 Logger
.Log
.Debug ("Successfully filtered {0} with {1}", path
, candidate_filter
);
173 Logger
.Log
.Debug ("Unsuccessfully filtered {0} with {1}, falling back", path
, candidate_filter
);
180 static private bool ShouldWeFilterThis (Indexable indexable
)
182 if (indexable
.Filtering
== IndexableFiltering
.Never
183 || indexable
.NoContent
)
186 if (indexable
.Filtering
== IndexableFiltering
.Always
)
189 // Our default behavior is to try to filter non-transient file
190 // indexable and indexables with a specific mime type attached.
191 if (indexable
.IsNonTransient
|| indexable
.MimeType
!= null)
197 static public bool FilterIndexable (Indexable indexable
, TextCache text_cache
, out Filter filter
)
200 ICollection filters
= null;
202 if (indexable
.Filtering
== IndexableFiltering
.AlreadyFiltered
)
205 if (! ShouldWeFilterThis (indexable
)) {
206 indexable
.NoContent
= true;
212 // First, figure out which filter we should use to deal with
215 // If a specific mime type is specified, try to index as that type.
216 if (indexable
.MimeType
!= null)
217 filters
= CreateFiltersFromMimeType (indexable
.MimeType
);
219 if (indexable
.IsNonTransient
) {
220 path
= indexable
.ContentUri
.LocalPath
;
222 // Otherwise sniff the mime-type from the file
223 if (indexable
.MimeType
== null)
224 indexable
.MimeType
= Beagle
.Util
.VFS
.Mime
.GetMimeType (path
);
226 if (filters
== null || filters
.Count
== 0) {
227 filters
= CreateFiltersFromIndexable (indexable
);
230 if (Directory
.Exists (path
)) {
231 indexable
.MimeType
= "inode/directory";
232 indexable
.NoContent
= true;
233 indexable
.Timestamp
= Directory
.GetLastWriteTime (path
);
234 } else if (File
.Exists (path
)) {
235 indexable
.Timestamp
= File
.GetLastWriteTime (path
);
237 Logger
.Log
.Warn ("No such file: {0}", path
);
242 // We don't know how to filter this, so there is nothing else to do.
243 if (filters
.Count
== 0) {
244 if (! indexable
.NoContent
) {
245 indexable
.NoContent
= true;
247 Logger
.Log
.Debug ("No filter for {0}", path
);
254 foreach (Filter candidate_filter
in filters
) {
256 Logger
.Log
.Debug ("Testing filter: {0}", candidate_filter
);
258 // Hook up the snippet writer.
259 if (candidate_filter
.SnippetMode
&& text_cache
!= null) {
260 if (candidate_filter
.OriginalIsText
&& indexable
.IsNonTransient
) {
261 text_cache
.MarkAsSelfCached (indexable
.Uri
);
262 } else if (indexable
.CacheContent
) {
263 TextWriter writer
= text_cache
.GetWriter (indexable
.Uri
);
264 candidate_filter
.AttachSnippetWriter (writer
);
268 if (indexable
.Crawled
)
269 candidate_filter
.EnableCrawlMode ();
271 // Be extra paranoid: never delete the actual
272 // URI we are indexing.
273 if (indexable
.DeleteContent
&& indexable
.Uri
!= indexable
.ContentUri
)
274 candidate_filter
.DeleteContent
= indexable
.DeleteContent
;
276 // Set the filter's URI
277 candidate_filter
.Uri
= indexable
.Uri
;
279 // Open the filter, copy the file's properties to the indexable,
280 // and hook up the TextReaders.
284 succesful_open
= candidate_filter
.Open (path
);
285 else if (indexable
.GetTextReader () != null)
286 succesful_open
= candidate_filter
.Open (indexable
.GetTextReader ());
288 succesful_open
= candidate_filter
.Open (indexable
.GetBinaryStream ());
290 if (succesful_open
) {
291 foreach (Property prop
in candidate_filter
.Properties
)
292 indexable
.AddProperty (prop
);
293 indexable
.SetTextReader (candidate_filter
.GetTextReader ());
294 indexable
.SetHotTextReader (candidate_filter
.GetHotTextReader ());
297 Logger
.Log
.Debug ("Successfully filtered {0} with {1}", path
, candidate_filter
);
299 filter
= candidate_filter
;
302 Logger
.Log
.Debug ("Unsuccessfully filtered {0} with {1}, falling back", path
, candidate_filter
);
307 Logger
.Log
.Debug ("None of the matching filters could process the file: {0}", path
);
312 static public bool FilterIndexable (Indexable indexable
, out Filter filter
)
314 return FilterIndexable (indexable
, null, out filter
);
317 static public bool FilterIndexable (Indexable indexable
)
319 Filter filter
= null;
321 return FilterIndexable (indexable
, null, out filter
);
324 /////////////////////////////////////////////////////////////////////////
326 private static Hashtable filter_types_by_flavor
= new Hashtable ();
327 private static Hashtable filter_versions_by_name
= new Hashtable ();
329 static private int ScanAssemblyForFilters (Assembly assembly
)
333 foreach (Type t
in assembly
.GetTypes ()) {
334 if (t
.IsSubclassOf (typeof (Filter
)) && ! t
.IsAbstract
) {
335 Filter filter
= null;
338 filter
= (Filter
) Activator
.CreateInstance (t
);
339 } catch (Exception ex
) {
340 Logger
.Log
.Error ("Caught exception while instantiating {0}", t
);
341 Logger
.Log
.Error (ex
);
347 filter_versions_by_name
[t
.ToString ()] = filter
.Version
;
349 foreach (FilterFlavor flavor
in filter
.SupportedFlavors
)
350 filter_types_by_flavor
[flavor
] = t
;
359 static private void ScanDirectoryForAssemblies (string dir
)
361 if (dir
== null || dir
== "")
364 if (! Directory
.Exists (dir
)) {
365 Logger
.Log
.Debug ("'{0}' is not a directory: No filters loaded", dir
);
369 DirectoryInfo dir_info
= new DirectoryInfo (dir
);
370 foreach (FileInfo file_info
in dir_info
.GetFiles ()) {
371 if (file_info
.Extension
== ".dll") {
372 Assembly a
= Assembly
.LoadFrom (file_info
.FullName
);
373 int n
= ScanAssemblyForFilters (a
);
374 Logger
.Log
.Debug ("Loaded {0} filter{1} from {2}",
375 n
, n
== 1 ? "" : "s", file_info
.FullName
);
381 /////////////////////////////////////////////////////////////////////////
383 public class FilteredStatus
386 private string filter_name
;
387 private int filter_version
;
395 [XmlAttribute ("Uri")]
396 public string UriAsString
{
398 return UriFu
.UriToSerializableString (uri
);
402 uri
= UriFu
.UriStringToUri (value);
406 public string FilterName
{
407 get { return filter_name; }
408 set { filter_name = value; }
411 public int FilterVersion
{
412 get { return filter_version; }
413 set { filter_version = value; }
416 public static FilteredStatus
New (Indexable indexable
, Filter filter
)
418 FilteredStatus status
= new FilteredStatus ();
420 status
.Uri
= indexable
.Uri
;
421 status
.FilterName
= filter
.GetType ().ToString ();
422 status
.FilterVersion
= filter
.Version
;