Soon all this datetime clumsiness will be over.
[beagle.git] / beagled / IndexWebContent.cs
blob1494489b2f01040e90f468b4f2280db79e917ff5
1 //
2 // IndexWebContent.cs
3 //
4 // Copyright (C) 2004-2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
32 using Beagle;
33 using Beagle.Daemon;
34 using Beagle.Util;
36 class IndexWebContentTool {
38 static void PrintUsage () {
39 string usage =
40 "beagle-index-url: Index web page content using the Beagle Search Engine.\n" +
41 "Web page: http://www.gnome.org/projects/beagle\n" +
42 "Copyright (C) 2004-2005 Novell, Inc.\n\n";
43 usage +=
44 "Usage: beagle-index-url <OPTIONS>\n\n" +
45 "Options:\n" +
46 " --url URL\t\tURL for the web page being indexed.\n" +
47 " --title TITLE\t\tTitle for the web page.\n" +
48 " --sourcefile PATH\tFile containing content to index.\n" +
49 "\t\t\tIf not set, content is read from STDIN.\n" +
50 " --deletesourcefile\tDelete file passed to --sourcefile after index.\n" +
51 " --help\t\tPrint this usage message.\n";
53 Console.WriteLine (usage);
56 static void Main (String[] args)
58 string uriStr = null;
59 string title = null;
60 string sourcefile = null;
61 bool deletesourcefile = false;
63 if (args.Length == 0 || Array.IndexOf (args, "--help") > -1) {
64 PrintUsage ();
65 Environment.Exit (1);
68 for (int i = 0; i < args.Length; i++) {
69 switch (args [i]) {
70 case "--url":
71 case "--title":
72 case "--sourcefile":
73 if (i + 1 >= args.Length ||
74 args [i + 1].StartsWith ("--")) {
75 PrintUsage ();
76 Environment.Exit (1);
78 break;
81 switch (args [i]) {
82 case "--url":
83 uriStr = args [++i];
84 break;
85 case "--title":
86 title = args [++i];
87 break;
88 case "--sourcefile":
89 sourcefile = args [++i];
90 break;
91 case "--deletesourcefile":
92 deletesourcefile = true;
93 break;
94 case "--help":
95 PrintUsage ();
96 return;
100 if (uriStr == null) {
101 Logger.Log.Error ("URI not specified!\n");
102 PrintUsage ();
103 Environment.Exit (1);
106 Uri uri = new Uri (uriStr, true);
107 if (uri.Scheme == Uri.UriSchemeHttps) {
108 // For security/privacy reasons, we don't index any
109 // SSL-encrypted pages.
110 Logger.Log.Error ("Indexing secure https:// URIs is not secure!");
111 Environment.Exit (1);
114 // We don't index file: Uris. Silently exit.
115 if (uri.IsFile)
116 return;
118 // We *definitely* don't index mailto: Uris. Silently exit.
119 if (uri.Scheme == Uri.UriSchemeMailto)
120 return;
122 Indexable indexable;
124 indexable = new Indexable (uri);
125 indexable.HitType = "WebHistory";
126 indexable.MimeType = "text/html";
127 indexable.Timestamp = DateTime.Now;
129 if (title != null)
130 indexable.AddProperty (Property.New ("dc:title", title));
132 if (sourcefile != null) {
134 if (!File.Exists (sourcefile)) {
135 Logger.Log.Error ("sourcefile '{0}' does not exist!", sourcefile);
136 Environment.Exit (1);
139 indexable.ContentUri = UriFu.PathToFileUri (sourcefile);
140 indexable.DeleteContent = deletesourcefile;
142 } else {
143 Stream stdin = Console.OpenStandardInput ();
144 if (stdin == null) {
145 Logger.Log.Error ("No sourcefile specified, and no standard input!\n");
146 PrintUsage ();
147 Environment.Exit (1);
150 indexable.SetTextReader (new StreamReader (stdin));
153 IndexingServiceRequest req = new IndexingServiceRequest ();
154 req.Add (indexable);
156 try {
157 Logger.Log.Info ("Indexing");
158 Logger.Log.Debug ("SendAsync");
159 req.SendAsync ();
160 Logger.Log.Debug ("Close");
161 req.Close ();
162 Logger.Log.Debug ("Done");
163 } catch (Exception e) {
164 Logger.Log.Error ("Indexing failed: {0}", e);
166 // Still clean up after ourselves, even if we couldn't
167 // index the content.
168 if (deletesourcefile)
169 File.Delete (sourcefile);
171 Environment.Exit (1);