Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / IndexWebContent.cs
blobb65d763b773e24d9e5a1b12dcee291c884cf9d1f
1 //
2 // IndexWebContent.cs
3 //
4 // Copyright (C) 2004-2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
32 using Beagle;
33 using Beagle.Daemon;
34 using Beagle.Util;
36 class IndexWebContentTool {
38 static void PrintUsage () {
39 Console.WriteLine ("IndexWebContent.exe: Index web page content using the Beagle Search Engine.");
40 Console.WriteLine (" --url URL\t\tURL for the web page being indexed.\n" +
41 " --title TITLE\t\tTitle for the web page.\n" +
42 " --sourcefile PATH\tFile containing content to index.\n" +
43 "\t\t\tIf not set, content is read from STDIN.\n" +
44 " --deletesourcefile\tDelete file passed to --sourcefile after index.\n" +
45 " --help\t\tPrint this usage message.\n");
48 static void Main (String[] args)
50 string uriStr = null;
51 string title = null;
52 string sourcefile = null;
53 bool deletesourcefile = false;
55 Logger.LogToFile (PathFinder.LogDir, "IndexWebContent", true);
56 Logger.Log.Info ("Running IndexWebContent");
57 Logger.Log.Debug ("Debug Mode!");
59 for (int i = 0; i < args.Length; i++) {
60 switch (args [i]) {
61 case "--url":
62 case "--title":
63 case "--sourcefile":
64 if (i + 1 >= args.Length ||
65 args [i + 1].StartsWith ("--")) {
66 PrintUsage ();
67 Environment.Exit (1);
69 break;
72 switch (args [i]) {
73 case "--url":
74 uriStr = args [++i];
75 break;
76 case "--title":
77 title = args [++i];
78 break;
79 case "--sourcefile":
80 sourcefile = args [++i];
81 break;
82 case "--deletesourcefile":
83 deletesourcefile = true;
84 break;
85 case "--help":
86 PrintUsage ();
87 return;
91 if (uriStr == null) {
92 Console.WriteLine ("ERROR: URI not specified!\n");
93 PrintUsage ();
94 Environment.Exit (1);
97 Uri uri = new Uri (uriStr, true);
98 if (uri.Scheme == Uri.UriSchemeHttps) {
99 // For security/privacy reasons, we don't index any
100 // SSL-encrypted pages.
101 Console.WriteLine ("ERROR: Indexing secure https:// URIs is not secure!");
102 Environment.Exit (1);
105 // We don't index file: Uris. Silently exit.
106 if (uri.IsFile)
107 return;
109 // We *definitely* don't index mailto: Uris. Silently exit.
110 if (uri.Scheme == Uri.UriSchemeMailto)
111 return;
113 Indexable indexable;
115 indexable = new Indexable (uri);
116 indexable.Type = "WebHistory";
117 indexable.MimeType = "text/html";
118 indexable.Timestamp = DateTime.Now;
120 if (title != null)
121 indexable.AddProperty (Property.New ("dc:title", title));
123 if (sourcefile != null) {
125 if (!File.Exists (sourcefile)) {
126 Console.WriteLine ("ERROR: sourcefile '{0}' does not exist!",
127 sourcefile);
128 Environment.Exit (1);
131 indexable.ContentUri = UriFu.PathToFileUri (sourcefile);
132 indexable.DeleteContent = deletesourcefile;
134 } else {
135 Stream stdin = Console.OpenStandardInput ();
136 if (stdin == null) {
137 Console.WriteLine ("ERROR: No sourcefile specified, and no standard input!\n");
138 PrintUsage ();
139 Environment.Exit (1);
142 indexable.SetTextReader (new StreamReader (stdin));
145 IndexingServiceRequest req = new IndexingServiceRequest ();
146 req.Add (indexable);
148 try {
149 System.Console.WriteLine ("Indexing");
150 Logger.Log.Debug ("SendAsync");
151 req.SendAsync ();
152 Logger.Log.Debug ("Close");
153 req.Close ();
154 Logger.Log.Debug ("Done");
155 } catch (Exception e) {
156 Console.WriteLine ("ERROR: Indexing failed:");
157 Console.Write (e);
159 // Still clean up after ourselves, even if we couldn't
160 // index the content.
161 if (deletesourcefile)
162 File.Delete (sourcefile);
164 Environment.Exit (1);