4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 using System
.Collections
;
37 class ExtractContentTool
{
39 static bool tokenize
= false;
40 static bool show_children
= false;
41 static string mime_type
= null;
43 // FIXME: We don't display structural breaks
44 static void DisplayContent (string line
)
48 string [] parts
= line
.Split (' ');
49 for (int i
= 0; i
< parts
.Length
; ++i
) {
50 string part
= parts
[i
].Trim ();
52 Console
.WriteLine ("{0}", part
);
56 Console
.WriteLine (line
);
60 static bool first_indexable
= true;
62 static void Display (Indexable indexable
)
64 if (!first_indexable
) {
66 Console
.WriteLine ("-----------------------------------------");
69 first_indexable
= false;
71 Console
.WriteLine ("Filename: " + indexable
.Uri
);
75 if (! FilterFactory
.FilterIndexable (indexable
, out filter
)) {
76 Console
.WriteLine ("No filter for {0}", indexable
.MimeType
);
81 Console
.WriteLine ("Filter: {0}", filter
);
82 Console
.WriteLine ("MimeType: {0}", filter
.MimeType
);
85 if (filter
.ChildIndexables
!= null && filter
.ChildIndexables
.Count
> 0) {
86 Console
.WriteLine ("Child indexables:");
88 foreach (Indexable i
in filter
.ChildIndexables
)
89 Console
.WriteLine (" {0}", i
.Uri
);
94 // Make sure that the properties are sorted.
95 ArrayList prop_array
= new ArrayList (indexable
.Properties
);
100 Console
.WriteLine ("Properties:");
101 Console
.WriteLine (" Timestamp = {0}", indexable
.Timestamp
);
102 foreach (Beagle
.Property prop
in prop_array
) {
103 Console
.WriteLine (" {0} = {1}", prop
.Key
, prop
.Value
);
105 Console
.WriteLine ();
107 if (indexable
.NoContent
)
112 reader
= indexable
.GetTextReader ();
113 if (reader
!= null) {
116 while ((line
= reader
.ReadLine ()) != null) {
118 Console
.WriteLine ("Content:");
121 DisplayContent (line
);
126 Console
.WriteLine ("(no content)");
128 Console
.WriteLine ();
131 reader
= indexable
.GetHotTextReader ();
132 if (reader
!= null) {
135 while ((line
= reader
.ReadLine ()) != null) {
137 Console
.WriteLine ("HotContent:");
140 DisplayContent (line
);
145 Console
.WriteLine ("(no hot content)");
147 Console
.WriteLine ();
150 Stream stream
= indexable
.GetBinaryStream ();
154 if (show_children
&& filter
.ChildIndexables
!= null) {
155 foreach (Indexable i
in filter
.ChildIndexables
) {
161 indexable
.Cleanup ();
165 static void PrintUsage ()
167 Console
.WriteLine ("beagle-extract-content: Extracts filtered data from a file.");
168 Console
.WriteLine ("Copyright (C) 2004-2005 Novell, Inc.");
169 Console
.WriteLine ();
170 Console
.WriteLine ("Usage: beagle-extract-content [OPTIONS] file [file ...]");
171 Console
.WriteLine ();
172 Console
.WriteLine ("Options:");
173 Console
.WriteLine (" --debug\t\t\tPrint debug info to the console");
174 Console
.WriteLine (" --tokenize\t\t\tTokenize the text before printing");
175 Console
.WriteLine (" --show-children\t\tShow filtering information for items created by filters");
176 Console
.WriteLine (" --mimetype=<mime_type>\tUse filter for mime_type");
177 Console
.WriteLine (" --outfile=<filename>\t\tOutput file name");
178 Console
.WriteLine (" --help\t\t\tShow this message");
179 Console
.WriteLine ();
182 static int Main (string[] args
)
184 if (Array
.IndexOf (args
, "--debug") == -1)
187 if (Array
.IndexOf (args
, "--help") != -1) {
192 if (Array
.IndexOf (args
, "--tokenize") != -1)
195 if (Array
.IndexOf (args
, "--show-children") != -1)
196 show_children
= true;
198 StreamWriter writer
= null;
199 string outfile
= null;
200 foreach (string arg
in args
) {
203 if (arg
.StartsWith ("--mimetype=")) {
204 mime_type
= arg
.Substring (11);
206 // output file option
207 // we need this in case the output contains different encoding
208 // printing to Console might not always display properly
209 } else if (arg
.StartsWith ("--outfile=")) {
210 outfile
= arg
.Substring (10);
211 Console
.WriteLine ("Redirecting output to " + outfile
);
212 FileStream f
= new FileStream (outfile
, FileMode
.Create
);
213 writer
= new StreamWriter (f
, System
.Text
.Encoding
.UTF8
);
215 } else if (arg
.StartsWith ("--")) // option, skip it
218 Uri uri
= UriFu
.PathToFileUri (arg
);
219 Indexable indexable
= new Indexable (uri
);
220 if (mime_type
!= null)
221 indexable
.MimeType
= mime_type
;
224 if (writer
!= null) {
225 Console
.SetOut (writer
);
229 if (writer
!= null) {
233 if (outfile
!= null) {
234 StreamWriter standardOutput
= new StreamWriter(Console
.OpenStandardOutput());
235 standardOutput
.AutoFlush
= true;
236 Console
.SetOut(standardOutput
);
239 } catch (Exception e
) {
240 Console
.WriteLine ("Unable to filter {0}: {1}", uri
, e
.Message
);
247 GLib
.MainLoop main_loop
= new GLib
.MainLoop ();
249 if (Environment
.GetEnvironmentVariable ("BEAGLE_TEST_MEMORY") != null) {
251 GLib
.Timeout
.Add (1000, delegate() { main_loop.Quit (); return false; }
);