4 // Copyright (C) 2004-2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 using System
.Collections
;
36 class IndexWebContentTool
{
38 static void PrintUsage () {
39 Console
.WriteLine ("IndexWebContent.exe: Index web page content using the Beagle Search Engine.");
40 Console
.WriteLine (" --url URL\t\tURL for the web page being indexed.\n" +
41 " --title TITLE\t\tTitle for the web page.\n" +
42 " --sourcefile PATH\tFile containing content to index.\n" +
43 "\t\t\tIf not set, content is read from STDIN.\n" +
44 " --deletesourcefile\tDelete file passed to --sourcefile after index.\n" +
45 " --help\t\tPrint this usage message.\n");
48 static void Main (String
[] args
)
52 string sourcefile
= null;
53 bool deletesourcefile
= false;
55 Logger
.LogToFile (PathFinder
.LogDir
, "IndexWebContent", true);
56 Logger
.Log
.Info ("Running IndexWebContent");
57 Logger
.Log
.Debug ("Debug Mode!");
59 for (int i
= 0; i
< args
.Length
; i
++) {
64 if (i
+ 1 >= args
.Length
||
65 args
[i
+ 1].StartsWith ("--")) {
80 sourcefile
= args
[++i
];
82 case "--deletesourcefile":
83 deletesourcefile
= true;
92 Console
.WriteLine ("ERROR: URI not specified!\n");
97 Uri uri
= new Uri (uriStr
, true);
98 if (uri
.Scheme
== Uri
.UriSchemeHttps
) {
99 // For security/privacy reasons, we don't index any
100 // SSL-encrypted pages.
101 Console
.WriteLine ("ERROR: Indexing secure https:// URIs is not secure!");
102 Environment
.Exit (1);
105 // We don't index file: Uris. Silently exit.
109 // We *definitely* don't index mailto: Uris. Silently exit.
110 if (uri
.Scheme
== Uri
.UriSchemeMailto
)
115 indexable
= new Indexable (uri
);
116 indexable
.Type
= "WebHistory";
117 indexable
.MimeType
= "text/html";
118 indexable
.Timestamp
= DateTime
.Now
;
121 indexable
.AddProperty (Property
.New ("dc:title", title
));
123 if (sourcefile
!= null) {
125 if (!File
.Exists (sourcefile
)) {
126 Console
.WriteLine ("ERROR: sourcefile '{0}' does not exist!",
128 Environment
.Exit (1);
131 indexable
.ContentUri
= UriFu
.PathToFileUri (sourcefile
);
132 indexable
.DeleteContent
= deletesourcefile
;
135 Stream stdin
= Console
.OpenStandardInput ();
137 Console
.WriteLine ("ERROR: No sourcefile specified, and no standard input!\n");
139 Environment
.Exit (1);
142 indexable
.SetTextReader (new StreamReader (stdin
));
145 IndexingServiceRequest req
= new IndexingServiceRequest ();
149 System
.Console
.WriteLine ("Indexing");
150 Logger
.Log
.Debug ("SendAsync");
152 Logger
.Log
.Debug ("Close");
154 Logger
.Log
.Debug ("Done");
155 } catch (Exception e
) {
156 Console
.WriteLine ("ERROR: Indexing failed:");
159 // Still clean up after ourselves, even if we couldn't
160 // index the content.
161 if (deletesourcefile
)
162 File
.Delete (sourcefile
);
164 Environment
.Exit (1);