2005-05-14 Gabor Kelemen <kelemeng@gnome.hu>
[beagle.git] / beagled / IndexWebContent.cs
blobfb67339fa0ae196cef3747eb98c6c90bc5928fce
1 //
2 // IndexWebContent.cs
3 //
4 // Copyright (C) 2004-2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
32 using Beagle;
33 using Beagle.Daemon;
34 using BU = Beagle.Util;
36 class IndexWebContentTool {
38 static void PrintUsage () {
39 Console.WriteLine ("IndexWebContent.exe: Index web page content using the Beagle Search Engine.");
40 Console.WriteLine (" --url URL\t\tURL for the web page being indexed.\n" +
41 " --title TITLE\t\tTitle for the web page.\n" +
42 " --sourcefile PATH\tFile containing content to index.\n" +
43 "\t\t\tIf not set, content is read from STDIN.\n" +
44 " --deletesourcefile\tDelete file passed to --sourcefile after index.\n" +
45 " --help\t\tPrint this usage message.\n");
48 static void Main (String[] args)
50 string uriStr = null;
51 string title = null;
52 string sourcefile = null;
53 bool deletesourcefile = false;
55 for (int i = 0; i < args.Length; i++) {
56 switch (args [i]) {
57 case "--url":
58 case "--title":
59 case "--sourcefile":
60 if (i + 1 >= args.Length ||
61 args [i + 1].StartsWith ("--")) {
62 PrintUsage ();
63 Environment.Exit (1);
65 break;
68 switch (args [i]) {
69 case "--url":
70 uriStr = args [++i];
71 break;
72 case "--title":
73 title = args [++i];
74 break;
75 case "--sourcefile":
76 sourcefile = args [++i];
77 break;
78 case "--deletesourcefile":
79 deletesourcefile = true;
80 break;
81 case "--help":
82 PrintUsage ();
83 return;
87 if (uriStr == null) {
88 Console.WriteLine ("ERROR: URI not specified!\n");
89 PrintUsage ();
90 Environment.Exit (1);
93 Uri uri = new Uri (uriStr, true);
94 if (uri.Scheme == Uri.UriSchemeHttps) {
95 // For security/privacy reasons, we don't index any
96 // SSL-encrypted pages.
97 Console.WriteLine ("ERROR: Indexing secure https:// URIs is not secure!");
98 Environment.Exit (1);
101 // We don't index file: Uris. Silently exit.
102 if (uri.IsFile)
103 return;
105 // We *definitely* don't index mailto: Uris. Silently exit.
106 if (uri.Scheme == Uri.UriSchemeMailto)
107 return;
109 FilteredIndexable indexable;
111 indexable = new FilteredIndexable (uri);
112 indexable.Type = "WebHistory";
113 indexable.MimeType = "text/html";
114 indexable.Timestamp = DateTime.Now;
116 if (title != null)
117 indexable.AddProperty (Property.New ("dc:title", title));
119 if (sourcefile != null) {
121 if (!File.Exists (sourcefile)) {
122 Console.WriteLine ("ERROR: sourcefile '{0}' does not exist!",
123 sourcefile);
124 Environment.Exit (1);
127 indexable.ContentUri = BU.UriFu.PathToFileUri (sourcefile);
128 indexable.DeleteContent = deletesourcefile;
130 } else {
131 Stream stdin = Console.OpenStandardInput ();
132 if (stdin == null) {
133 Console.WriteLine ("ERROR: No sourcefile specified, and no standard input!\n");
134 PrintUsage ();
135 Environment.Exit (1);
138 indexable.SetTextReader (new StreamReader (stdin));
141 try {
142 System.Console.WriteLine ("Indexing");
143 WebHistoryIndexer.Index (indexable);
144 } catch (Exception e) {
145 Console.WriteLine ("ERROR: Indexing failed:");
146 Console.Write (e);
148 // Still clean up after ourselves, even if we couldn't
149 // index the content.
150 if (deletesourcefile)
151 File.Delete (sourcefile);
153 Environment.Exit (1);