Fixed #374055:Only the first "tag" is detected in digikam.
[beagle.git] / beagled / ExtractContent.cs
blob8640a8bfc9db3213ed0acacb97c84a4c117ced43
1 //
2 // ExtractContent.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
31 using System.Net;
33 using Beagle;
34 using Beagle.Util;
35 using Beagle.Daemon;
37 class ExtractContentTool {
39 static bool tokenize = false;
40 static bool show_children = false;
41 static string mime_type = null;
43 // FIXME: We don't display structural breaks
44 static void DisplayContent (string line)
46 if (tokenize) {
48 string [] parts = line.Split (' ');
49 for (int i = 0; i < parts.Length; ++i) {
50 string part = parts [i].Trim ();
51 if (part != "")
52 Console.WriteLine ("{0}", part);
55 } else {
56 Console.WriteLine (line);
60 static bool first_indexable = true;
62 static void Display (Indexable indexable)
64 if (!first_indexable) {
65 Console.WriteLine ();
66 Console.WriteLine ("-----------------------------------------");
67 Console.WriteLine ();
69 first_indexable = false;
71 Console.WriteLine ("Filename: " + indexable.Uri);
73 Filter filter;
75 if (! FilterFactory.FilterIndexable (indexable, out filter)) {
76 Console.WriteLine ("No filter for {0}", indexable.MimeType);
77 indexable.Cleanup ();
78 return;
81 Console.WriteLine ("Filter: {0}", filter);
82 Console.WriteLine ("MimeType: {0}", filter.MimeType);
83 Console.WriteLine ();
85 if (filter.ChildIndexables != null && filter.ChildIndexables.Count > 0) {
86 Console.WriteLine ("Child indexables:");
88 foreach (Indexable i in filter.ChildIndexables)
89 Console.WriteLine (" {0}", i.Uri);
91 Console.WriteLine ();
94 // Make sure that the properties are sorted.
95 ArrayList prop_array = new ArrayList (indexable.Properties);
96 prop_array.Sort ();
98 bool first = true;
100 Console.WriteLine ("Properties:");
101 Console.WriteLine (" Timestamp = {0}", indexable.Timestamp);
102 foreach (Beagle.Property prop in prop_array) {
103 Console.WriteLine (" {0} = {1}", prop.Key, prop.Value);
105 Console.WriteLine ();
107 if (indexable.NoContent)
108 return;
110 TextReader reader;
112 reader = indexable.GetTextReader ();
113 if (reader != null) {
114 string line;
115 first = true;
116 while ((line = reader.ReadLine ()) != null) {
117 if (first) {
118 Console.WriteLine ("Content:");
119 first = false;
121 DisplayContent (line);
123 reader.Close ();
125 if (first)
126 Console.WriteLine ("(no content)");
127 else
128 Console.WriteLine ();
131 reader = indexable.GetHotTextReader ();
132 if (reader != null) {
133 string line;
134 first = true;
135 while ((line = reader.ReadLine ()) != null) {
136 if (first) {
137 Console.WriteLine ("HotContent:");
138 first = false;
140 DisplayContent (line);
142 reader.Close ();
144 if (first)
145 Console.WriteLine ("(no hot content)");
146 else
147 Console.WriteLine ();
150 Stream stream = indexable.GetBinaryStream ();
151 if (stream != null)
152 stream.Close ();
154 if (show_children && filter.ChildIndexables != null) {
155 foreach (Indexable i in filter.ChildIndexables) {
156 i.StoreStream ();
157 Display (i);
161 indexable.Cleanup ();
165 static void PrintUsage ()
167 Console.WriteLine ("beagle-extract-content: Extracts filtered data from a file.");
168 Console.WriteLine ("Copyright (C) 2004-2005 Novell, Inc.");
169 Console.WriteLine ();
170 Console.WriteLine ("Usage: beagle-extract-content [OPTIONS] file [file ...]");
171 Console.WriteLine ();
172 Console.WriteLine ("Options:");
173 Console.WriteLine (" --debug\t\t\tPrint debug info to the console");
174 Console.WriteLine (" --tokenize\t\t\tTokenize the text before printing");
175 Console.WriteLine (" --show-children\t\tShow filtering information for items created by filters");
176 Console.WriteLine (" --mimetype=<mime_type>\tUse filter for mime_type");
177 Console.WriteLine (" --outfile=<filename>\t\tOutput file name");
178 Console.WriteLine (" --help\t\t\tShow this message");
179 Console.WriteLine ();
182 static int Main (string[] args)
184 if (Array.IndexOf (args, "--debug") == -1)
185 Log.Disable ();
187 if (Array.IndexOf (args, "--help") != -1) {
188 PrintUsage ();
189 return 0;
192 if (Array.IndexOf (args, "--tokenize") != -1)
193 tokenize = true;
195 if (Array.IndexOf (args, "--show-children") != -1)
196 show_children = true;
198 StreamWriter writer = null;
199 string outfile = null;
200 foreach (string arg in args) {
202 // mime-type option
203 if (arg.StartsWith ("--mimetype=")) {
204 mime_type = arg.Substring (11);
205 continue;
206 // output file option
207 // we need this in case the output contains different encoding
208 // printing to Console might not always display properly
209 } else if (arg.StartsWith ("--outfile=")) {
210 outfile = arg.Substring (10);
211 Console.WriteLine ("Redirecting output to " + outfile);
212 FileStream f = new FileStream (outfile, FileMode.Create);
213 writer = new StreamWriter (f, System.Text.Encoding.UTF8);
214 continue;
215 } else if (arg.StartsWith ("--")) // option, skip it
216 continue;
218 Uri uri = UriFu.PathToFileUri (arg);
219 Indexable indexable = new Indexable (uri);
220 if (mime_type != null)
221 indexable.MimeType = mime_type;
223 try {
224 if (writer != null) {
225 Console.SetOut (writer);
228 Display (indexable);
229 if (writer != null) {
230 writer.Flush ();
233 if (outfile != null) {
234 StreamWriter standardOutput = new StreamWriter(Console.OpenStandardOutput());
235 standardOutput.AutoFlush = true;
236 Console.SetOut(standardOutput);
239 } catch (Exception e) {
240 Console.WriteLine ("Unable to filter {0}: {1}", uri, e.Message);
241 return -1;
244 if (writer != null)
245 writer.Close ();
247 GLib.MainLoop main_loop = new GLib.MainLoop ();
249 if (Environment.GetEnvironmentVariable ("BEAGLE_TEST_MEMORY") != null) {
250 GC.Collect ();
251 GLib.Timeout.Add (1000, delegate() { main_loop.Quit (); return false; });
252 main_loop.Run ();
255 return 0;