Dont index style nodes.
[beagle.git] / beagled / ExtractContent.cs
blobad86eacd2af85a4a34e1e76aa0d621bc876be7eb
1 //
2 // ExtractContent.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
31 using System.Net;
33 using Beagle;
34 using Beagle.Util;
35 using Beagle.Daemon;
37 class ExtractContentTool {
39 static bool tokenize = false;
40 static bool show_children = false;
41 static string mime_type = null;
43 // FIXME: We don't display structural breaks
44 static void DisplayContent (string line)
46 if (tokenize) {
48 string [] parts = line.Split (' ');
49 for (int i = 0; i < parts.Length; ++i) {
50 string part = parts [i].Trim ();
51 if (part != "")
52 Console.WriteLine ("{0}", part);
55 } else {
56 Console.WriteLine (line);
60 static bool first_indexable = true;
62 static void Display (Indexable indexable)
64 if (!first_indexable) {
65 Console.WriteLine ();
66 Console.WriteLine ("-----------------------------------------");
67 Console.WriteLine ();
69 first_indexable = false;
71 Console.WriteLine ("Filename: " + indexable.Uri);
73 Filter filter;
75 if (! FilterFactory.FilterIndexable (indexable, out filter)) {
76 Console.WriteLine ("No filter for {0}", indexable.MimeType);
77 indexable.Cleanup ();
78 return;
81 Console.WriteLine ("Filter: {0}", filter);
82 Console.WriteLine ("MimeType: {0}", filter.MimeType);
83 Console.WriteLine ();
85 if (filter.ChildIndexables != null && filter.ChildIndexables.Count > 0) {
86 Console.WriteLine ("Child indexables:");
88 foreach (Indexable i in filter.ChildIndexables)
89 Console.WriteLine (" {0}", i.Uri);
91 Console.WriteLine ();
94 // Make sure that the properties are sorted.
95 ArrayList prop_array = new ArrayList (indexable.Properties);
96 prop_array.Sort ();
98 bool first;
99 first = true;
101 foreach (Beagle.Property prop in prop_array) {
102 if (first) {
103 Console.WriteLine ("Properties:");
104 first = false;
106 Console.WriteLine (" {0} = {1}", prop.Key, prop.Value);
108 if (! first)
109 Console.WriteLine ();
111 if (indexable.NoContent)
112 return;
114 TextReader reader;
116 reader = indexable.GetTextReader ();
117 if (reader != null) {
118 string line;
119 first = true;
120 while ((line = reader.ReadLine ()) != null) {
121 if (first) {
122 Console.WriteLine ("Content:");
123 first = false;
125 DisplayContent (line);
127 reader.Close ();
129 if (first)
130 Console.WriteLine ("(no content)");
131 else
132 Console.WriteLine ();
135 reader = indexable.GetHotTextReader ();
136 if (reader != null) {
137 string line;
138 first = true;
139 while ((line = reader.ReadLine ()) != null) {
140 if (first) {
141 Console.WriteLine ("HotContent:");
142 first = false;
144 DisplayContent (line);
146 reader.Close ();
148 if (first)
149 Console.WriteLine ("(no hot content)");
150 else
151 Console.WriteLine ();
154 Stream stream = indexable.GetBinaryStream ();
155 if (stream != null)
156 stream.Close ();
158 if (show_children && filter.ChildIndexables != null) {
159 foreach (Indexable i in filter.ChildIndexables) {
160 i.StoreStream ();
161 Display (i);
165 indexable.Cleanup ();
169 static void PrintUsage ()
171 Console.WriteLine ("beagle-extract-content: Extracts filtered data from a file.");
172 Console.WriteLine ("Copyright (C) 2004-2005 Novell, Inc.");
173 Console.WriteLine ();
174 Console.WriteLine ("Usage: beagle-extract-content [OPTIONS] file [file ...]");
175 Console.WriteLine ();
176 Console.WriteLine ("Options:");
177 Console.WriteLine (" --debug\t\t\tPrint debug info to the console");
178 Console.WriteLine (" --tokenize\t\t\tTokenize the text before printing");
179 Console.WriteLine (" --show-children\t\tShow filtering information for items created by filters");
180 Console.WriteLine (" --mimetype=<mime_type>\tUse filter for mime_type");
181 Console.WriteLine (" --outfile=<filename>\t\tOutput file name");
182 Console.WriteLine (" --help\t\t\tShow this message");
183 Console.WriteLine ();
186 static int Main (string[] args)
188 if (Array.IndexOf (args, "--debug") == -1)
189 Log.Disable ();
191 if (Array.IndexOf (args, "--help") != -1) {
192 PrintUsage ();
193 return 0;
196 if (Array.IndexOf (args, "--tokenize") != -1)
197 tokenize = true;
199 if (Array.IndexOf (args, "--show-children") != -1)
200 show_children = true;
202 StreamWriter writer = null;
203 string outfile = null;
204 foreach (string arg in args) {
206 // mime-type option
207 if (arg.StartsWith ("--mimetype=")) {
208 mime_type = arg.Substring (11);
209 continue;
210 // output file option
211 // we need this in case the output contains different encoding
212 // printing to Console might not always display properly
213 } else if (arg.StartsWith ("--outfile=")) {
214 outfile = arg.Substring (10);
215 Console.WriteLine ("Redirecting output to " + outfile);
216 FileStream f = new FileStream (outfile, FileMode.Create);
217 writer = new StreamWriter (f, System.Text.Encoding.UTF8);
218 continue;
219 } else if (arg.StartsWith ("--")) // option, skip it
220 continue;
222 Uri uri = UriFu.PathToFileUri (arg);
223 Indexable indexable = new Indexable (uri);
224 if (mime_type != null)
225 indexable.MimeType = mime_type;
227 try {
228 if (writer != null) {
229 Console.SetOut (writer);
232 Display (indexable);
233 if (writer != null) {
234 writer.Flush ();
237 if (outfile != null) {
238 StreamWriter standardOutput = new StreamWriter(Console.OpenStandardOutput());
239 standardOutput.AutoFlush = true;
240 Console.SetOut(standardOutput);
243 } catch (Exception e) {
244 Console.WriteLine ("Unable to filter {0}: {1}", uri, e.Message);
245 return -1;
248 if (writer != null)
249 writer.Close ();
251 if (Environment.GetEnvironmentVariable ("BEAGLE_TEST_MEMORY") != null) {
252 GC.Collect ();
253 GLib.Timeout.Add (1000, delegate() { Gtk.Application.Quit (); return false; });
254 Gtk.Application.Run ();
257 return 0;