4 // Copyright (C) 2004-2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 using System
.Collections
;
36 class IndexWebContentTool
{
38 static void PrintUsage () {
40 "beagle-index-url: Index web page content using the Beagle Search Engine.\n" +
41 "Web page: http://www.gnome.org/projects/beagle\n" +
42 "Copyright (C) 2004-2005 Novell, Inc.\n\n";
44 "Usage: beagle-index-url <OPTIONS>\n\n" +
46 " --url URL\t\tURL for the web page being indexed.\n" +
47 " --title TITLE\t\tTitle for the web page.\n" +
48 " --sourcefile PATH\tFile containing content to index.\n" +
49 "\t\t\tIf not set, content is read from STDIN.\n" +
50 " --deletesourcefile\tDelete file passed to --sourcefile after index.\n" +
51 " --help\t\tPrint this usage message.\n";
53 Console
.WriteLine (usage
);
56 static void Main (String
[] args
)
60 string sourcefile
= null;
61 bool deletesourcefile
= false;
63 if (args
.Length
== 0 || Array
.IndexOf (args
, "--help") > -1) {
68 for (int i
= 0; i
< args
.Length
; i
++) {
73 if (i
+ 1 >= args
.Length
||
74 args
[i
+ 1].StartsWith ("--")) {
89 sourcefile
= args
[++i
];
91 case "--deletesourcefile":
92 deletesourcefile
= true;
100 if (uriStr
== null) {
101 Logger
.Log
.Error ("URI not specified!\n");
103 Environment
.Exit (1);
106 Uri uri
= new Uri (uriStr
, true);
107 if (uri
.Scheme
== Uri
.UriSchemeHttps
) {
108 // For security/privacy reasons, we don't index any
109 // SSL-encrypted pages.
110 Logger
.Log
.Error ("Indexing secure https:// URIs is not secure!");
111 Environment
.Exit (1);
114 // We don't index file: Uris. Silently exit.
118 // We *definitely* don't index mailto: Uris. Silently exit.
119 if (uri
.Scheme
== Uri
.UriSchemeMailto
)
124 indexable
= new Indexable (uri
);
125 indexable
.HitType
= "WebHistory";
126 indexable
.MimeType
= "text/html";
127 indexable
.Timestamp
= DateTime
.Now
;
130 indexable
.AddProperty (Property
.New ("dc:title", title
));
132 if (sourcefile
!= null) {
134 if (!File
.Exists (sourcefile
)) {
135 Logger
.Log
.Error ("sourcefile '{0}' does not exist!", sourcefile
);
136 Environment
.Exit (1);
139 indexable
.ContentUri
= UriFu
.PathToFileUri (sourcefile
);
140 indexable
.DeleteContent
= deletesourcefile
;
143 Stream stdin
= Console
.OpenStandardInput ();
145 Logger
.Log
.Error ("No sourcefile specified, and no standard input!\n");
147 Environment
.Exit (1);
150 indexable
.SetTextReader (new StreamReader (stdin
));
153 IndexingServiceRequest req
= new IndexingServiceRequest ();
157 Logger
.Log
.Info ("Indexing");
158 Logger
.Log
.Debug ("SendAsync");
160 Logger
.Log
.Debug ("Close");
162 Logger
.Log
.Debug ("Done");
163 } catch (Exception e
) {
164 Logger
.Log
.Error ("Indexing failed: {0}", e
);
166 // Still clean up after ourselves, even if we couldn't
167 // index the content.
168 if (deletesourcefile
)
169 File
.Delete (sourcefile
);
171 Environment
.Exit (1);