From 2f8d27a04a32a47e165e73d4bdec8680f386be42 Mon Sep 17 00:00:00 2001 From: dbera Date: Fri, 24 Nov 2006 20:48:48 +0000 Subject: [PATCH] Add FilterArchive from bugzilla with some minor modifications. Archive filter needs some work; more interesting properties of the archive file itself needs to be stored (names of archive contents ?) and nested archives are not working. --- BeagleClient/Property.cs | 58 ++++++ Filters/AssemblyInfo.cs | 1 + Filters/FilterArchive.cs | 449 +++++++++++++++++++++++++++++++++++++++++++++++ Filters/Makefile.am | 1 + 4 files changed, 509 insertions(+) create mode 100644 Filters/FilterArchive.cs diff --git a/BeagleClient/Property.cs b/BeagleClient/Property.cs index 7275d599..83293d7d 100644 --- a/BeagleClient/Property.cs +++ b/BeagleClient/Property.cs @@ -25,6 +25,9 @@ // using System; +using System.Collections; +using System.IO; +using System.Text; using System.Xml.Serialization; using Beagle.Util; @@ -46,6 +49,18 @@ namespace Beagle { bool is_mutable; bool is_stored; + // Commonly used property keys + public const string PrivateNamespace = "_private:"; + public const string SplitFilenamePropKey = "beagle:SplitFilename"; + public const string ExactFilenamePropKey = "beagle:ExactFilename"; + public const string TextFilenamePropKey = "beagle:Filename"; + public const string NoPunctFilenamePropKey = "beagle:NoPunctFilename"; + public const string FilenameExtensionPropKey = "beagle:FilenameExtension"; + public const string ParentDirUriPropKey = Property.PrivateNamespace + "ParentDirUri"; + public const string IsDirectoryPropKey = Property.PrivateNamespace + "IsDirectory"; + public const string IsChildPropKey = "beagle:IsChild"; + + [XmlAttribute] public PropertyType Type { get { return type; } @@ -216,5 +231,48 @@ namespace Beagle { { return String.Format ("{0}={1}", Key, Value); } + + // Standard properties for files + // Used by FileSystem backend and filters which produce file child-indexables + public static IEnumerable StandardFileProperties (string name, bool mutable) + { + StringBuilder sb; + sb = new StringBuilder (); + + string no_ext, ext, no_punct; + no_ext = Path.GetFileNameWithoutExtension (name); + ext = Path.GetExtension (name).ToLower (); + + sb.Append (no_ext); + for (int i = 0; i < sb.Length; ++i) + if (! Char.IsLetterOrDigit (sb [i])) + sb [i] = ' '; + no_punct = sb.ToString (); + + + Property prop; + + prop = Property.NewKeyword (ExactFilenamePropKey, name); + prop.IsMutable = mutable; + yield return prop; + + prop = Property.New (TextFilenamePropKey, no_ext); + prop.IsMutable = mutable; + yield return prop; + + prop = Property.New (NoPunctFilenamePropKey, no_punct); + prop.IsMutable = mutable; + yield return prop; + + prop = Property.NewUnsearched (FilenameExtensionPropKey, ext); + prop.IsMutable = mutable; + yield return prop; + + string str; + str = StringFu.FuzzyDivide (no_ext); + prop = Property.NewUnstored (SplitFilenamePropKey, str); + prop.IsMutable = mutable; + yield return prop; + } } } diff --git a/Filters/AssemblyInfo.cs b/Filters/AssemblyInfo.cs index 4934daed..9ccac156 100644 --- a/Filters/AssemblyInfo.cs +++ b/Filters/AssemblyInfo.cs @@ -30,6 +30,7 @@ using Beagle.Filters; // All filter types have to be listed here to be loaded. [assembly: Beagle.Daemon.FilterTypes ( + typeof(FilterArchive), typeof(FilterAbiWord), typeof(FilterBMP), typeof(FilterBoo), diff --git a/Filters/FilterArchive.cs b/Filters/FilterArchive.cs new file mode 100644 index 00000000..a0ce5003 --- /dev/null +++ b/Filters/FilterArchive.cs @@ -0,0 +1,449 @@ +// +// FilterArchive.cs +// +// Copyright (C) 2004-2006 Novell, Inc. +// +// +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections; +using System.IO; + +using ICSharpCode.SharpZipLib; +using ICSharpCode.SharpZipLib.Zip; +using ICSharpCode.SharpZipLib.GZip; +using ICSharpCode.SharpZipLib.BZip2; +using ICSharpCode.SharpZipLib.Tar; + +using Beagle; +using Beagle.Daemon; +using Beagle.Util; + +namespace Beagle.Filters { + + public class FilterArchive : Beagle.Daemon.Filter, IDisposable { + + Archive archive = null; + ArchiveHandler tarHandler = null; + + public FilterArchive () + { + AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/zip")); + AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-bzip-compressed-tar")); + AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-compressed-tar")); + AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-tar")); + AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-tgz")); + //AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-gzip")); + //AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-bzip")); + } + + protected override void DoOpen (FileInfo info) + { + archive = new Archive (info.FullName, MimeType); + } + + protected override void DoPullProperties () + { + // FIXME: Fetch the archive properties. + } + + protected override void DoPullSetup () + { + this.tarHandler = new ArchiveHandler (this); + this.tarHandler.OnEachEntry (archive); + + AddChildIndexables (this.tarHandler.ChildIndexables); + } + + protected override void DoClose () + { + Dispose (); + } + + public void Dispose () + { + if (this.archive != null) + archive.Close (); + } + + private class ArchiveEntry { + private string name; + private long size; + private string mimetype; + private bool is_directory; + private DateTime modified; + private string uri; + private string comment; + private string temp_file_name; + + public ArchiveEntry () + { + name = null; + size = 0; + mimetype = null; + is_directory = false; + modified = DateTimeUtil.UnixToDateTimeUtc (0); + comment = null; + temp_file_name = null; + } + + public string Name { + get { + return name; + } + + set { + name = value; + } + } + + public string MimeType { + get { + return mimetype; + } + + set { + mimetype = value; + } + } + + public System.DateTime Modified { + get { + return modified; + } + + set { + modified = value; + } + } + + public long Size { + get { + return size; + } + + set { + size = value; + } + } + + public bool IsDirectory { + get { + return is_directory; + } + + set { + is_directory = value; + } + } + + public string Uri { + get { + return uri; + } + + set { + uri = value; + } + } + + public string Comment { + get { + return comment; + } + + set { + comment = value; + } + } + + public string TempFileName { + get { + return temp_file_name; + } + + set { + temp_file_name = value; + } + } + + } + + private class Archive : Stream + { + Stream base_stream = null; + string uri = null; + string name = null; + string method = null; + + delegate ArchiveEntry GetNextEntryType (); + GetNextEntryType getNextEntry; + bool first = true; + + public Archive (string filename, string mimeType) { + this.uri = UriFu.PathToFileUriString (filename); + name = filename; + base_stream = new FileStream (filename, + FileMode.Open, + FileAccess.Read); + Init (base_stream, mimeType); + + } + + private void Init (Stream stream, string mimeType) { + + switch (mimeType) { + case "application/zip": + base_stream = new ZipInputStream (base_stream); + getNextEntry = new GetNextEntryType (GetNextEntryZip); + method = "zip:"; + break; + case "application/x-bzip-compressed-tar": + base_stream = new BZip2InputStream (base_stream); + base_stream = new TarInputStream (base_stream); + getNextEntry = new GetNextEntryType (GetNextEntryTar); + method = "bzip2tar:"; + break; + case "application/x-compressed-tar": + case "application/x-tgz": + base_stream = new GZipInputStream (base_stream); + base_stream = new TarInputStream (base_stream); + getNextEntry = new GetNextEntryType (GetNextEntryTar); + method = "gziptar:"; + break; + case "application/x-tar": + base_stream = new TarInputStream (base_stream); + getNextEntry = new GetNextEntryType (GetNextEntryTar); + method = "tar:"; + break; + case "application/x-gzip": + base_stream = new GZipInputStream (base_stream); + getNextEntry = new GetNextEntryType (GetNextEntrySingle); + method = "gzip:"; + break; + case "application/x-bzip": + base_stream = new BZip2InputStream (base_stream); + getNextEntry = new GetNextEntryType (GetNextEntrySingle); + method = "bzip:"; + break; + default: + throw new ArgumentException ("Invalid or unsupported mime type."); + } + } + + /* + public string GetNextEntrySingleAsUri () { + if (first) { + first = false; + return uri + method; + } else + return null; + } + */ + + /* Returns stream of the next entry */ + public ArchiveEntry GetNextEntry () + { + return getNextEntry (); + } + + private String GetEntryFile (string entry_name) + { + string temp_file = null; + //temp_file = Path.Combine (Path.GetTempPath (), entry_name); + temp_file = Path.GetTempFileName (); + File.Delete (temp_file); + temp_file = temp_file + Path.GetExtension (entry_name); + Console.WriteLine ("GetEntryFile: temp_file = {0}", temp_file); + FileStream filestream = File.OpenWrite (temp_file); + + BufferedStream buffered_stream = new BufferedStream (filestream); + BinaryWriter writer = new BinaryWriter (buffered_stream); + + const int BUFFER_SIZE = 8192; + byte[] data = new byte [BUFFER_SIZE]; + + int read; + do { + read = base_stream.Read (data, 0, BUFFER_SIZE); + if (read > 0) + writer.Write (data, 0, read); + } while (read > 0); + + writer.Close (); + + File.SetLastWriteTime (temp_file, File.GetLastWriteTime (this.name)); + return temp_file; + } + + public ArchiveEntry GetNextEntryZip () { + ArchiveEntry a_entry = null; + + ZipInputStream inputStream = base_stream as ZipInputStream; + ZipEntry entry = inputStream.GetNextEntry(); + if (entry != null && !entry.IsDirectory) { + a_entry = new ArchiveEntry (); + a_entry.Uri = /* DB uri + method + */entry.Name; + a_entry.Size = entry.Size; + a_entry.Modified = entry.DateTime; + a_entry.IsDirectory = entry.IsDirectory; + a_entry.Name = entry.Name; + a_entry.TempFileName = GetEntryFile (entry.Name); + a_entry.MimeType = Beagle.Util.XdgMime.GetMimeType (a_entry.TempFileName); + a_entry.Comment = entry.Comment; + } + return a_entry; + } + + public ArchiveEntry GetNextEntrySingle () { + if (first) { + first = false; + return null; + } else + return null; + } + + public ArchiveEntry GetNextEntryTar () { + ArchiveEntry a_entry = null; + TarInputStream inputStream = base_stream as TarInputStream; + TarEntry entry = inputStream.GetNextEntry(); + if (entry != null && !entry.IsDirectory) { + a_entry = new ArchiveEntry (); + a_entry.Uri = /* DB uri + method + */entry.Name; + a_entry.Size = entry.Size; + a_entry.Modified = entry.ModTime; + a_entry.IsDirectory = entry.IsDirectory; + a_entry.Name = entry.Name; + a_entry.TempFileName = GetEntryFile (entry.Name); + a_entry.MimeType = Beagle.Util.XdgMime.GetMimeType (a_entry.TempFileName); + } + return a_entry; + } + + public override int Read (byte[] buffer, int offset, int length) { + return base_stream.Read (buffer, offset, length); + } + + public override IAsyncResult BeginRead (byte[] buffer, int offset, int length, + AsyncCallback cback, object state) + { + return base_stream.BeginRead (buffer, offset, length, cback, state); + } + + public override int EndRead(IAsyncResult async_result) { + return base_stream.EndRead (async_result); + } + + public override void Write (byte[] buffer, int offset, int length) { + throw new NotSupportedException (); + } + public override void Flush () { + throw new NotSupportedException (); + } + public override long Seek (long offset, SeekOrigin origin) { + throw new NotSupportedException (); + } + public override void SetLength (long value) { + throw new System.NotSupportedException(); + } + public override bool CanRead { + get { + return base_stream.CanRead; + } + } + public override bool CanSeek { + get { + return false; + } + } + public override bool CanWrite { + get { + return false; + } + } + public override long Length { + get { + throw new System.NotSupportedException(); + } + } + public override long Position { + get { + throw new System.NotSupportedException(); + } + set { + throw new System.NotSupportedException(); + } + } + } + + + private class ArchiveHandler { + private Beagle.Daemon.Filter filter; + private ArrayList child_indexables = new ArrayList (); + private int count = 0; // entries handled so far + + public ArchiveHandler (Beagle.Daemon.Filter filter) + { + this.filter = filter; + } + + public void OnEachEntry (Archive archive) + { + ArchiveEntry a_entry = null; + + //Log.Debug ("{0}.GetNextentry", archive.filename); + while ((a_entry = archive.GetNextEntry ()) != null) { + // FIXME: For nested archives, create uid:foo#bar + // instead of uid:foo#xxx#bar (avoid duplicates ?) + Indexable child = new Indexable (new Uri (filter.Uri.ToString () + "#" + a_entry.Uri, true)); + + child.CacheContent = false; + child.MimeType = a_entry.MimeType; + child.ContentUri = UriFu.PathToFileUri (a_entry.TempFileName); + child.DeleteContent = true; + + //Log.Debug ("Creating child with uri={0}, content-uri={1}", + // a_entry.Uri, + // child.ContentUri); + //Console.WriteLine ("Name: {0}, MimeType: {1}", sub_uri, child.MimeType); + + if (a_entry.Comment != null) + child.AddProperty (Property.New ("fixme:comment", a_entry.Comment)); + child.AddProperty (Property.New ("fixme:name", a_entry.Name)); + child.AddProperty (Property.NewKeyword ("fixme:size", a_entry.Size)); + child.AddProperty (Property.New ("fixme:relativeuri", a_entry.Uri)); + foreach (Property prop in Property.StandardFileProperties (Path.GetFileName (a_entry.Name), false)) + child.AddProperty (prop); + + //child.SetBinaryStream (File.OpenRead (a_entry.Name)); + + this.child_indexables.Add (child); + this.count++; + } + } + + public ICollection ChildIndexables { + get { return this.child_indexables; } + } + } + } +} diff --git a/Filters/Makefile.am b/Filters/Makefile.am index 0fe524a2..c270c2e2 100644 --- a/Filters/Makefile.am +++ b/Filters/Makefile.am @@ -24,6 +24,7 @@ CSFILES = \ $(srcdir)/AssemblyInfo.cs \ $(ENTAGGED_CSFILES) \ $(HAP_CSFILES) \ + $(srcdir)/FilterArchive.cs \ $(srcdir)/FilterHtml.cs \ $(srcdir)/FilterImage.cs \ $(srcdir)/FilterJpeg.cs \ -- 2.11.4.GIT