4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 using System
.Collections
;
37 class ExtractContentTool
{
39 static bool tokenize
= false;
40 static bool show_children
= false;
41 static string mime_type
= null;
43 // FIXME: We don't display structural breaks
44 static void DisplayContent (string line
)
48 string [] parts
= line
.Split (' ');
49 for (int i
= 0; i
< parts
.Length
; ++i
) {
50 string part
= parts
[i
].Trim ();
52 Console
.WriteLine ("{0}", part
);
56 Console
.WriteLine (line
);
60 static bool first_indexable
= true;
62 static void Display (Indexable indexable
)
64 if (!first_indexable
) {
66 Console
.WriteLine ("-----------------------------------------");
69 first_indexable
= false;
71 Console
.WriteLine ("Filename: " + indexable
.Uri
);
75 if (! FilterFactory
.FilterIndexable (indexable
, out filter
)) {
76 Console
.WriteLine ("No filter for {0}", indexable
.MimeType
);
81 Console
.WriteLine ("Filter: {0}", filter
);
82 Console
.WriteLine ("MimeType: {0}", filter
.MimeType
);
85 if (filter
.ChildIndexables
!= null && filter
.ChildIndexables
.Count
> 0) {
86 Console
.WriteLine ("Child indexables:");
88 foreach (Indexable i
in filter
.ChildIndexables
)
89 Console
.WriteLine (" {0}", i
.Uri
);
94 // Make sure that the properties are sorted.
95 ArrayList prop_array
= new ArrayList (indexable
.Properties
);
100 Console
.WriteLine ("Properties:");
102 if (indexable
.ValidTimestamp
)
103 Console
.WriteLine (" Timestamp = {0}", DateTimeUtil
.ToString (indexable
.Timestamp
));
105 foreach (Beagle
.Property prop
in prop_array
) {
106 Console
.WriteLine (" {0} = {1}", prop
.Key
, prop
.Value
);
109 Console
.WriteLine ();
111 if (indexable
.NoContent
)
116 reader
= indexable
.GetTextReader ();
117 if (reader
!= null) {
120 while ((line
= reader
.ReadLine ()) != null) {
122 Console
.WriteLine ("Content:");
125 DisplayContent (line
);
130 Console
.WriteLine ("(no content)");
132 Console
.WriteLine ();
135 reader
= indexable
.GetHotTextReader ();
136 if (reader
!= null) {
139 while ((line
= reader
.ReadLine ()) != null) {
141 Console
.WriteLine ("HotContent:");
144 DisplayContent (line
);
149 Console
.WriteLine ("(no hot content)");
151 Console
.WriteLine ();
154 Stream stream
= indexable
.GetBinaryStream ();
158 // Clean up any temporary files associated with filtering this indexable.
159 indexable
.Cleanup ();
161 if (filter
.ChildIndexables
!= null) {
162 foreach (Indexable i
in filter
.ChildIndexables
) {
163 if (! show_children
) {
173 indexable
.Cleanup ();
177 static void PrintUsage ()
179 Console
.WriteLine ("beagle-extract-content: Extracts filtered data from a file.");
180 Console
.WriteLine ("Copyright (C) 2004-2005 Novell, Inc.");
181 Console
.WriteLine ();
182 Console
.WriteLine ("Usage: beagle-extract-content [OPTIONS] file [file ...]");
183 Console
.WriteLine ();
184 Console
.WriteLine ("Options:");
185 Console
.WriteLine (" --debug\t\t\tPrint debug info to the console");
186 Console
.WriteLine (" --tokenize\t\t\tTokenize the text before printing");
187 Console
.WriteLine (" --show-children\t\tShow filtering information for items created by filters");
188 Console
.WriteLine (" --mimetype=<mime_type>\tUse filter for mime_type");
189 Console
.WriteLine (" --outfile=<filename>\t\tOutput file name");
190 Console
.WriteLine (" --help\t\t\tShow this message");
191 Console
.WriteLine ();
194 static int Main (string[] args
)
196 if (Array
.IndexOf (args
, "--debug") == -1)
199 if (Array
.IndexOf (args
, "--help") != -1) {
204 if (Array
.IndexOf (args
, "--tokenize") != -1)
207 if (Array
.IndexOf (args
, "--show-children") != -1)
208 show_children
= true;
210 StreamWriter writer
= null;
211 string outfile
= null;
212 foreach (string arg
in args
) {
215 if (arg
.StartsWith ("--mimetype=")) {
216 mime_type
= arg
.Substring (11);
218 // output file option
219 // we need this in case the output contains different encoding
220 // printing to Console might not always display properly
221 } else if (arg
.StartsWith ("--outfile=")) {
222 outfile
= arg
.Substring (10);
223 Console
.WriteLine ("Redirecting output to " + outfile
);
224 FileStream f
= new FileStream (outfile
, FileMode
.Create
);
225 writer
= new StreamWriter (f
, System
.Text
.Encoding
.UTF8
);
227 } else if (arg
.StartsWith ("--")) // option, skip it
230 Uri uri
= UriFu
.PathToFileUri (arg
);
231 Indexable indexable
= new Indexable (uri
);
232 if (mime_type
!= null)
233 indexable
.MimeType
= mime_type
;
236 if (writer
!= null) {
237 Console
.SetOut (writer
);
241 if (writer
!= null) {
245 if (outfile
!= null) {
246 StreamWriter standardOutput
= new StreamWriter(Console
.OpenStandardOutput());
247 standardOutput
.AutoFlush
= true;
248 Console
.SetOut(standardOutput
);
251 } catch (Exception e
) {
252 Console
.WriteLine ("Unable to filter {0}: {1}", uri
, e
.Message
);
259 GLib
.MainLoop main_loop
= new GLib
.MainLoop ();
261 if (Environment
.GetEnvironmentVariable ("BEAGLE_TEST_MEMORY") != null) {
263 GLib
.Timeout
.Add (1000, delegate() { main_loop.Quit (); return false; }
);