Cleanup child indexables in beagle-extract-content. Print timestamp with timezone...
[beagle.git] / beagled / ExtractContent.cs
blob68c70a0db87083597af27c4a8140a6f97da3e864
1 //
2 // ExtractContent.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
31 using System.Net;
33 using Beagle;
34 using Beagle.Util;
35 using Beagle.Daemon;
37 class ExtractContentTool {
39 static bool tokenize = false;
40 static bool show_children = false;
41 static string mime_type = null;
43 // FIXME: We don't display structural breaks
44 static void DisplayContent (string line)
46 if (tokenize) {
48 string [] parts = line.Split (' ');
49 for (int i = 0; i < parts.Length; ++i) {
50 string part = parts [i].Trim ();
51 if (part != "")
52 Console.WriteLine ("{0}", part);
55 } else {
56 Console.WriteLine (line);
60 static bool first_indexable = true;
62 static void Display (Indexable indexable)
64 if (!first_indexable) {
65 Console.WriteLine ();
66 Console.WriteLine ("-----------------------------------------");
67 Console.WriteLine ();
69 first_indexable = false;
71 Console.WriteLine ("Filename: " + indexable.Uri);
73 Filter filter;
75 if (! FilterFactory.FilterIndexable (indexable, out filter)) {
76 Console.WriteLine ("No filter for {0}", indexable.MimeType);
77 indexable.Cleanup ();
78 return;
81 Console.WriteLine ("Filter: {0}", filter);
82 Console.WriteLine ("MimeType: {0}", filter.MimeType);
83 Console.WriteLine ();
85 if (filter.ChildIndexables != null && filter.ChildIndexables.Count > 0) {
86 Console.WriteLine ("Child indexables:");
88 foreach (Indexable i in filter.ChildIndexables)
89 Console.WriteLine (" {0}", i.Uri);
91 Console.WriteLine ();
94 // Make sure that the properties are sorted.
95 ArrayList prop_array = new ArrayList (indexable.Properties);
96 prop_array.Sort ();
98 bool first = true;
100 Console.WriteLine ("Properties:");
102 if (indexable.ValidTimestamp)
103 Console.WriteLine (" Timestamp = {0}", DateTimeUtil.ToString (indexable.Timestamp));
105 foreach (Beagle.Property prop in prop_array) {
106 Console.WriteLine (" {0} = {1}", prop.Key, prop.Value);
109 Console.WriteLine ();
111 if (indexable.NoContent)
112 return;
114 TextReader reader;
116 reader = indexable.GetTextReader ();
117 if (reader != null) {
118 string line;
119 first = true;
120 while ((line = reader.ReadLine ()) != null) {
121 if (first) {
122 Console.WriteLine ("Content:");
123 first = false;
125 DisplayContent (line);
127 reader.Close ();
129 if (first)
130 Console.WriteLine ("(no content)");
131 else
132 Console.WriteLine ();
135 reader = indexable.GetHotTextReader ();
136 if (reader != null) {
137 string line;
138 first = true;
139 while ((line = reader.ReadLine ()) != null) {
140 if (first) {
141 Console.WriteLine ("HotContent:");
142 first = false;
144 DisplayContent (line);
146 reader.Close ();
148 if (first)
149 Console.WriteLine ("(no hot content)");
150 else
151 Console.WriteLine ();
154 Stream stream = indexable.GetBinaryStream ();
155 if (stream != null)
156 stream.Close ();
158 // Clean up any temporary files associated with filtering this indexable.
159 indexable.Cleanup ();
161 if (filter.ChildIndexables != null) {
162 foreach (Indexable i in filter.ChildIndexables) {
163 if (! show_children) {
164 i.Cleanup ();
165 continue;
168 i.StoreStream ();
169 Display (i);
173 indexable.Cleanup ();
177 static void PrintUsage ()
179 Console.WriteLine ("beagle-extract-content: Extracts filtered data from a file.");
180 Console.WriteLine ("Copyright (C) 2004-2005 Novell, Inc.");
181 Console.WriteLine ();
182 Console.WriteLine ("Usage: beagle-extract-content [OPTIONS] file [file ...]");
183 Console.WriteLine ();
184 Console.WriteLine ("Options:");
185 Console.WriteLine (" --debug\t\t\tPrint debug info to the console");
186 Console.WriteLine (" --tokenize\t\t\tTokenize the text before printing");
187 Console.WriteLine (" --show-children\t\tShow filtering information for items created by filters");
188 Console.WriteLine (" --mimetype=<mime_type>\tUse filter for mime_type");
189 Console.WriteLine (" --outfile=<filename>\t\tOutput file name");
190 Console.WriteLine (" --help\t\t\tShow this message");
191 Console.WriteLine ();
194 static int Main (string[] args)
196 if (Array.IndexOf (args, "--debug") == -1)
197 Log.Disable ();
199 if (Array.IndexOf (args, "--help") != -1) {
200 PrintUsage ();
201 return 0;
204 if (Array.IndexOf (args, "--tokenize") != -1)
205 tokenize = true;
207 if (Array.IndexOf (args, "--show-children") != -1)
208 show_children = true;
210 StreamWriter writer = null;
211 string outfile = null;
212 foreach (string arg in args) {
214 // mime-type option
215 if (arg.StartsWith ("--mimetype=")) {
216 mime_type = arg.Substring (11);
217 continue;
218 // output file option
219 // we need this in case the output contains different encoding
220 // printing to Console might not always display properly
221 } else if (arg.StartsWith ("--outfile=")) {
222 outfile = arg.Substring (10);
223 Console.WriteLine ("Redirecting output to " + outfile);
224 FileStream f = new FileStream (outfile, FileMode.Create);
225 writer = new StreamWriter (f, System.Text.Encoding.UTF8);
226 continue;
227 } else if (arg.StartsWith ("--")) // option, skip it
228 continue;
230 Uri uri = UriFu.PathToFileUri (arg);
231 Indexable indexable = new Indexable (uri);
232 if (mime_type != null)
233 indexable.MimeType = mime_type;
235 try {
236 if (writer != null) {
237 Console.SetOut (writer);
240 Display (indexable);
241 if (writer != null) {
242 writer.Flush ();
245 if (outfile != null) {
246 StreamWriter standardOutput = new StreamWriter(Console.OpenStandardOutput());
247 standardOutput.AutoFlush = true;
248 Console.SetOut(standardOutput);
251 } catch (Exception e) {
252 Console.WriteLine ("Unable to filter {0}: {1}", uri, e.Message);
253 return -1;
256 if (writer != null)
257 writer.Close ();
259 GLib.MainLoop main_loop = new GLib.MainLoop ();
261 if (Environment.GetEnvironmentVariable ("BEAGLE_TEST_MEMORY") != null) {
262 GC.Collect ();
263 GLib.Timeout.Add (1000, delegate() { main_loop.Quit (); return false; });
264 main_loop.Run ();
267 return 0;