4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 using System
.Collections
;
37 class ExtractContentTool
{
39 static bool tokenize
= false;
40 static bool show_children
= false;
41 static string mime_type
= null;
43 // FIXME: We don't display structural breaks
44 static void DisplayContent (string line
)
48 string [] parts
= line
.Split (' ');
49 for (int i
= 0; i
< parts
.Length
; ++i
) {
50 string part
= parts
[i
].Trim ();
52 Console
.WriteLine ("{0}", part
);
56 Console
.WriteLine (line
);
60 static bool first_indexable
= true;
62 static void Display (Indexable indexable
)
64 if (!first_indexable
) {
66 Console
.WriteLine ("-----------------------------------------");
69 first_indexable
= false;
71 Console
.WriteLine ("Filename: " + indexable
.Uri
);
75 if (! FilterFactory
.FilterIndexable (indexable
, out filter
)) {
76 Console
.WriteLine ("No filter for {0}", indexable
.MimeType
);
81 Console
.WriteLine ("Filter: {0}", filter
);
82 Console
.WriteLine ("MimeType: {0}", filter
.MimeType
);
85 if (filter
.ChildIndexables
!= null && filter
.ChildIndexables
.Count
> 0) {
86 Console
.WriteLine ("Child indexables:");
88 foreach (Indexable i
in filter
.ChildIndexables
)
89 Console
.WriteLine (" {0}", i
.Uri
);
94 // Make sure that the properties are sorted.
95 ArrayList prop_array
= new ArrayList (indexable
.Properties
);
101 foreach (Beagle
.Property prop
in prop_array
) {
103 Console
.WriteLine ("Properties:");
106 Console
.WriteLine (" {0} = {1}", prop
.Key
, prop
.Value
);
109 Console
.WriteLine ();
111 if (indexable
.NoContent
)
116 reader
= indexable
.GetTextReader ();
117 if (reader
!= null) {
120 while ((line
= reader
.ReadLine ()) != null) {
122 Console
.WriteLine ("Content:");
125 DisplayContent (line
);
130 Console
.WriteLine ("(no content)");
132 Console
.WriteLine ();
135 reader
= indexable
.GetHotTextReader ();
136 if (reader
!= null) {
139 while ((line
= reader
.ReadLine ()) != null) {
141 Console
.WriteLine ("HotContent:");
144 DisplayContent (line
);
149 Console
.WriteLine ("(no hot content)");
151 Console
.WriteLine ();
154 Stream stream
= indexable
.GetBinaryStream ();
158 if (show_children
&& filter
.ChildIndexables
!= null) {
159 foreach (Indexable i
in filter
.ChildIndexables
) {
165 indexable
.Cleanup ();
169 static void PrintUsage ()
171 Console
.WriteLine ("beagle-extract-content: Extracts filtered data from a file.");
172 Console
.WriteLine ("Copyright (C) 2004-2005 Novell, Inc.");
173 Console
.WriteLine ();
174 Console
.WriteLine ("Usage: beagle-extract-content [OPTIONS] file [file ...]");
175 Console
.WriteLine ();
176 Console
.WriteLine ("Options:");
177 Console
.WriteLine (" --debug\t\t\tPrint debug info to the console");
178 Console
.WriteLine (" --tokenize\t\t\tTokenize the text before printing");
179 Console
.WriteLine (" --show-children\t\tShow filtering information for items created by filters");
180 Console
.WriteLine (" --mimetype=<mime_type>\tUse filter for mime_type");
181 Console
.WriteLine (" --outfile=<filename>\t\tOutput file name");
182 Console
.WriteLine (" --help\t\t\tShow this message");
183 Console
.WriteLine ();
186 static int Main (string[] args
)
188 if (Array
.IndexOf (args
, "--debug") == -1)
191 if (Array
.IndexOf (args
, "--help") != -1) {
196 if (Array
.IndexOf (args
, "--tokenize") != -1)
199 if (Array
.IndexOf (args
, "--show-children") != -1)
200 show_children
= true;
202 StreamWriter writer
= null;
203 string outfile
= null;
204 foreach (string arg
in args
) {
207 if (arg
.StartsWith ("--mimetype=")) {
208 mime_type
= arg
.Substring (11);
210 // output file option
211 // we need this in case the output contains different encoding
212 // printing to Console might not always display properly
213 } else if (arg
.StartsWith ("--outfile=")) {
214 outfile
= arg
.Substring (10);
215 Console
.WriteLine ("Redirecting output to " + outfile
);
216 FileStream f
= new FileStream (outfile
, FileMode
.Create
);
217 writer
= new StreamWriter (f
, System
.Text
.Encoding
.UTF8
);
219 } else if (arg
.StartsWith ("--")) // option, skip it
222 Uri uri
= UriFu
.PathToFileUri (arg
);
223 Indexable indexable
= new Indexable (uri
);
224 if (mime_type
!= null)
225 indexable
.MimeType
= mime_type
;
228 if (writer
!= null) {
229 Console
.SetOut (writer
);
233 if (writer
!= null) {
237 if (outfile
!= null) {
238 StreamWriter standardOutput
= new StreamWriter(Console
.OpenStandardOutput());
239 standardOutput
.AutoFlush
= true;
240 Console
.SetOut(standardOutput
);
243 } catch (Exception e
) {
244 Console
.WriteLine ("Unable to filter {0}: {1}", uri
, e
.Message
);
251 GLib
.MainLoop main_loop
= new GLib
.MainLoop ();
253 if (Environment
.GetEnvironmentVariable ("BEAGLE_TEST_MEMORY") != null) {
255 GLib
.Timeout
.Add (1000, delegate() { main_loop.Quit (); return false; }
);