4 // Copyright (C) 2004-2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 using System
.Collections
;
34 using BU
= Beagle
.Util
;
36 class IndexWebContentTool
{
38 static void PrintUsage () {
39 Console
.WriteLine ("IndexWebContent.exe: Index web page content using the Beagle Search Engine.");
40 Console
.WriteLine (" --url URL\t\tURL for the web page being indexed.\n" +
41 " --title TITLE\t\tTitle for the web page.\n" +
42 " --sourcefile PATH\tFile containing content to index.\n" +
43 "\t\t\tIf not set, content is read from STDIN.\n" +
44 " --deletesourcefile\tDelete file passed to --sourcefile after index.\n" +
45 " --help\t\tPrint this usage message.\n");
48 static void Main (String
[] args
)
52 string sourcefile
= null;
53 bool deletesourcefile
= false;
55 for (int i
= 0; i
< args
.Length
; i
++) {
60 if (i
+ 1 >= args
.Length
||
61 args
[i
+ 1].StartsWith ("--")) {
76 sourcefile
= args
[++i
];
78 case "--deletesourcefile":
79 deletesourcefile
= true;
88 Console
.WriteLine ("ERROR: URI not specified!\n");
93 Uri uri
= new Uri (uriStr
, true);
94 if (uri
.Scheme
== Uri
.UriSchemeHttps
) {
95 // For security/privacy reasons, we don't index any
96 // SSL-encrypted pages.
97 Console
.WriteLine ("ERROR: Indexing secure https:// URIs is not secure!");
101 // We don't index file: Uris. Silently exit.
105 // We *definitely* don't index mailto: Uris. Silently exit.
106 if (uri
.Scheme
== Uri
.UriSchemeMailto
)
109 FilteredIndexable indexable
;
111 indexable
= new FilteredIndexable (uri
);
112 indexable
.Type
= "WebHistory";
113 indexable
.MimeType
= "text/html";
114 indexable
.Timestamp
= DateTime
.Now
;
117 indexable
.AddProperty (Property
.New ("dc:title", title
));
119 if (sourcefile
!= null) {
121 if (!File
.Exists (sourcefile
)) {
122 Console
.WriteLine ("ERROR: sourcefile '{0}' does not exist!",
124 Environment
.Exit (1);
127 indexable
.ContentUri
= BU
.UriFu
.PathToFileUri (sourcefile
);
128 indexable
.DeleteContent
= deletesourcefile
;
131 Stream stdin
= Console
.OpenStandardInput ();
133 Console
.WriteLine ("ERROR: No sourcefile specified, and no standard input!\n");
135 Environment
.Exit (1);
138 indexable
.SetTextReader (new StreamReader (stdin
));
142 System
.Console
.WriteLine ("Indexing");
143 WebHistoryIndexer
.Index (indexable
);
144 } catch (Exception e
) {
145 Console
.WriteLine ("ERROR: Indexing failed:");
148 // Still clean up after ourselves, even if we couldn't
149 // index the content.
150 if (deletesourcefile
)
151 File
.Delete (sourcefile
);
153 Environment
.Exit (1);