New e-d-s backend which indexes all local addressbooks and calendars.
[beagle.git] / Util / ImLog.cs
blob37ac7550e05a30c1515d740a4bd213bcd5638b0f
1 //
2 // ImLog.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Globalization;
31 using System.Text;
32 using System.Text.RegularExpressions;
33 using System.Xml;
35 namespace Beagle.Util {
37 public abstract class ImLog {
39 public delegate void Sink (ImLog imLog);
41 private bool loaded = false;
43 public string LogFile;
44 public long LogOffset;
46 public string Protocol;
47 public string Client;
49 public DateTime StartTime;
50 public DateTime EndTime;
51 public DateTime Timestamp;
52 public string Snippet;
54 public string SpeakingTo;
55 public string Identity;
57 private Hashtable speakerHash = new Hashtable ();
59 public class Utterance {
60 public DateTime Timestamp;
61 public String Who;
62 public String Text;
64 private ArrayList utterances = new ArrayList ();
66 //////////////////////////
68 protected ImLog (string client, string protocol, string file, long offset)
70 Client = client;
71 Protocol = protocol;
72 LogFile = file;
73 LogOffset = offset;
76 protected ImLog (string client, string protocol, string file) : this (client, protocol, file, -1)
77 { }
79 public Uri Uri {
80 get { return UriFu.PathToFileUri (this.LogFile); }
83 public string EllipsizedSnippet {
84 get {
85 string snippet = Snippet;
86 // FIXME: We should try to avoid breaking mid-word
87 if (snippet != null && snippet.Length > 50)
88 snippet = snippet.Substring (0, 50) + "...";
89 return snippet;
93 public ICollection Speakers {
94 get { return speakerHash.Keys; }
97 public IList Utterances {
98 get {
99 if (! loaded) {
100 Load ();
101 loaded = true;
103 return utterances;
107 protected IList RawUtterances {
108 get { return utterances; }
111 protected void AddUtterance (DateTime timestamp,
112 String who,
113 String text)
115 Utterance utt = new Utterance ();
116 utt.Timestamp = timestamp;
117 utt.Who = who;
118 utt.Text = text;
120 if (StartTime.Ticks == 0 || StartTime > timestamp)
121 StartTime = timestamp;
123 if (EndTime.Ticks == 0 || EndTime < timestamp)
124 EndTime = timestamp;
126 speakerHash [who] = true;
128 utterances.Add (utt);
131 protected void AppendToPreviousUtterance (string text)
133 if (utterances.Count > 0) {
134 Utterance utt = (Utterance) utterances [utterances.Count - 1];
135 utt.Text += "\n" + text;
139 protected void ClearUtterances ()
141 utterances.Clear ();
144 protected abstract void Load ();
147 ///////////////////////////////////////////////////////////////////////////////
150 // Gaim Logs
153 public class GaimLog : ImLog {
155 private static string StripTags (string line)
157 int first = line.IndexOf ('<');
158 if (first == -1)
159 return line;
161 StringBuilder builder = new StringBuilder ();
162 int i = 0;
163 while (i < line.Length) {
165 int j;
166 if (first == -1) {
167 j = line.IndexOf ('<', i);
168 } else {
169 j = first;
170 first = -1;
173 int k = -1;
174 if (j != -1) {
175 k = line.IndexOf ('>', j);
177 // If a "<" is unmatched, preserve it, and the
178 // rest of the line
179 if (k == -1)
180 j = -1;
183 if (j == -1) {
184 builder.Append (line, i, line.Length - i);
185 break;
188 builder.Append (line, i, j-i);
190 i = k+1;
193 return builder.ToString ();
196 private static bool IsNewConversation (string line)
198 int i = line.IndexOf ("--- New Conv");
199 return 0 <= i && i < 5;
202 static private string REGEX_DATE =
203 "Conversation @ \\S+\\s+(\\S+)\\s+(\\d+)\\s+(\\d+):(\\d+):(\\d+)\\s+(\\d+)";
205 static private Regex dateRegex = new Regex (REGEX_DATE,
206 RegexOptions.IgnoreCase | RegexOptions.Compiled);
207 static private DateTimeFormatInfo dtInfo = new DateTimeFormatInfo ();
209 private static DateTime NewConversationTime (string line)
211 Match m = dateRegex.Match (line);
212 if (m.Success) {
213 // I'm sure there is an easier way to do this.
214 String monthName = m.Groups [1].ToString ();
215 int day = int.Parse (m.Groups [2].ToString ());
216 int hr = int.Parse (m.Groups [3].ToString ());
217 int min = int.Parse (m.Groups [4].ToString ());
218 int sec = int.Parse (m.Groups [5].ToString ());
219 int yr = int.Parse (m.Groups [6].ToString ());
221 int mo = -1;
222 for (int i = 1; i <= 12; ++i) {
223 if (monthName == dtInfo.GetAbbreviatedMonthName (i)) {
224 mo = i;
225 break;
229 if (mo != -1)
230 return new DateTime (yr, mo, day, hr, min, sec);
233 Console.WriteLine ("Failed on '{0}'", line);
234 return new DateTime ();
237 ///////////////////////////////////////
239 private bool TrySnippet ()
241 int best_word_count = 0;
243 foreach (Utterance utt in RawUtterances) {
245 string possible_snippet = utt.Text.Trim ();
247 int word_count = StringFu.CountWords (possible_snippet, 15);
248 if (word_count > best_word_count) {
249 Snippet = possible_snippet;
250 best_word_count = word_count;
254 if (word_count > 3)
255 return true;
258 return false;
261 // FIXME: The ending timestamp in the log will be inaccurate
262 // until Load is called... before that, the ending time will
263 // come from the timestamp of the snippet-line.
265 private void SetSnippet ()
267 LoadWithTermination (new LoadTerminator (TrySnippet));
270 ///////////////////////////////////////
272 private GaimLog (string protocol, string file, long offset) : base ("gaim", protocol, file, offset)
274 SetSnippet ();
277 private GaimLog (string protocol, string file) : base ("gaim", protocol, file)
279 SetSnippet ();
282 // Return true if a new utterance is now available,
283 // and false if the previous utterance was changed.
284 private bool ProcessLine (string line)
286 if (! line.StartsWith ("(")) {
287 AppendToPreviousUtterance (line);
288 return false;
290 int j = line.IndexOf (')');
291 if (j == -1) {
292 AppendToPreviousUtterance (line);
293 return false;
295 string whenStr = line.Substring (1, j-1);
296 string[] whenSplit = whenStr.Split (':');
297 int hour, minute, second;
298 try {
299 hour = int.Parse (whenSplit [0]);
300 minute = int.Parse (whenSplit [1]);
301 second = int.Parse (whenSplit [2]);
302 } catch {
303 // If something goes wrong, this line probably
304 // spills over from the previous one.
305 AppendToPreviousUtterance (line);
306 return false;
309 line = line.Substring (j+1).Trim ();
311 // FIXME: this is wrong --- since we just get a time,
312 // the date gets set to 'now'
313 DateTime when = new DateTime (StartTime.Year,
314 StartTime.Month,
315 StartTime.Day,
316 hour, minute, second);
318 // Try to deal with time wrapping around.
319 while (when < EndTime)
320 when = when.AddDays (1);
322 int i = line.IndexOf (':');
323 if (i == -1)
324 return false;
325 string alias = line.Substring (0, i);
326 string text = line.Substring (i+1).Trim ();
328 AddUtterance (when, alias, text);
330 return true;
333 protected delegate bool LoadTerminator ();
335 protected override void Load ()
337 ClearUtterances ();
338 LoadWithTermination (null);
341 protected void LoadWithTermination (LoadTerminator terminator)
343 FileStream fs;
344 StreamReader sr;
345 string line;
347 try {
348 fs = new FileStream (LogFile,
349 FileMode.Open,
350 FileAccess.Read,
351 FileShare.Read);
352 if (LogOffset > 0)
353 fs.Seek (LogOffset, SeekOrigin.Begin);
354 sr = new StreamReader (fs);
355 } catch (Exception e) {
356 // If we can't open the file, just fail.
357 Console.WriteLine ("Could not open '{0}' (offset={1})", LogFile, LogOffset);
358 Console.WriteLine (e);
359 return;
362 line = sr.ReadLine (); // throw away first line
363 if (line != null) {
365 // Could the second line ever start w/ < in a non-html log?
366 // I hope not!
367 bool isHtml = line.Length > 0 && line [0] == '<';
369 while ((line = sr.ReadLine ()) != null) {
370 if (isHtml)
371 line = StripTags (line);
373 if (IsNewConversation (line))
374 break;
376 // Only check termination when a new Utterance has become
377 // available.
378 if (ProcessLine (line)
379 && terminator != null
380 && terminator ())
381 break;
385 sr.Close ();
386 fs.Close ();
389 private static void ScanNewStyleLog (FileInfo file, ArrayList array)
391 // file.Directory.Parent.Parent.Name is the name of the current protocol (ex. aim)
392 ImLog log = new GaimLog (file.Directory.Parent.Parent.Name, file.FullName);
394 string startStr = Path.GetFileNameWithoutExtension (file.Name);
395 try {
396 log.StartTime = DateTime.ParseExact (startStr,
397 "yyyy-MM-dd.HHmmss",
398 CultureInfo.CurrentCulture);
399 } catch (FormatException) {
400 Logger.Log.Warn ("IMLog: Could not parse date/time from '{0}', ignoring.", startStr);
401 return;
404 log.Timestamp = file.LastWriteTime;
406 // Gaim likes to represent many characters in hex-escaped %xx form
407 log.SpeakingTo = StringFu.HexUnescape (file.Directory.Name);
408 log.Identity = StringFu.HexUnescape (file.Directory.Parent.Name);
410 array.Add (log);
414 private static void ScanOldStyleLog (FileInfo file, ArrayList array)
416 Stream stream;
417 stream = new FileStream (file.FullName,
418 FileMode.Open,
419 FileAccess.Read,
420 FileShare.Read);
421 StreamReader sr = new StreamReader (stream);
422 string line;
423 long offset = 0;
425 string speakingTo = Path.GetFileNameWithoutExtension (file.Name);
427 line = sr.ReadLine ();
428 bool isHtml = line.ToLower ().StartsWith ("<html>");
429 offset = line.Length + 1;
431 while ((line = sr.ReadLine ()) != null) {
432 long newOffset = offset + line.Length + 1;
433 if (isHtml)
434 line = StripTags (line);
435 if (IsNewConversation (line)) {
436 ImLog log = new GaimLog ("aim", file.FullName, offset); //FIXME: protocol
437 log.StartTime = NewConversationTime (line);
438 log.Identity = "_OldGaim_"; // FIXME: parse a few lines of the log to figure this out
439 log.SpeakingTo = speakingTo;
441 array.Add (log);
444 offset = newOffset;
447 sr.Close ();
448 stream.Close ();
451 public static ICollection ScanLog (FileInfo file)
453 ArrayList array = new ArrayList ();
454 if (file.Extension == ".txt" || file.Extension == ".html")
455 ScanNewStyleLog (file, array);
456 else if (file.Extension == ".log")
457 ScanOldStyleLog (file, array);
458 return array;
462 ///////////////////////////////////////////////////////////////////////////////
465 // Kopete Logs
468 public class KopeteLog : ImLog {
470 private KopeteLog (string protocol, string file) : base ("kopete", protocol, file)
474 private const string date_format = "yyyy M d H:m:s";
476 protected override void Load ()
478 ClearUtterances ();
480 XmlReader reader;
481 DateTime base_date = DateTime.MinValue;
483 try {
484 reader = new XmlTextReader (new FileStream (LogFile,
485 FileMode.Open,
486 FileAccess.Read,
487 FileShare.Read));
488 } catch (Exception e) {
489 Console.WriteLine ("Could not open '{0}'", LogFile);
490 Console.WriteLine (e);
491 return;
494 while (reader.Read ()) {
495 if (reader.NodeType != XmlNodeType.Element)
496 continue;
498 switch (reader.Name) {
499 case "date":
500 base_date = new DateTime (Convert.ToInt32 (reader.GetAttribute ("year")),
501 Convert.ToInt32 (reader.GetAttribute ("month")),
503 break;
505 case "msg":
506 // Parse the timestamp of the message
507 string timestamp = String.Format ("{0} {1} {2}",
508 base_date.Year,
509 base_date.Month,
510 reader.GetAttribute ("time"));
512 DateTime msg_date = DateTime.MinValue;
514 try {
515 msg_date = DateTime.ParseExact (timestamp,
516 date_format,
517 null);
518 } catch (Exception ex) {
519 Logger.Log.Error ("Couldn't parse Kopete timestamp: {0}", timestamp);
520 break;
523 string who = reader.GetAttribute ("nick");
524 if (who == null || who == "")
525 who = reader.GetAttribute ("from");
526 if (who == null || who == "")
527 break;
529 // Advance to the text node for the actual message
530 reader.Read ();
532 AddUtterance (msg_date, who, reader.Value);
533 break;
537 reader.Close ();
540 public static ICollection ScanLog (FileInfo file)
542 ArrayList array = new ArrayList ();
544 // FIXME: Artificially split logs into conversations depending on the
545 // amount of time elapsed betweet messages?
547 // Figure out the protocol from the parent.parent foldername
548 string protocol = file.Directory.Parent.Name.Substring (0, file.Directory.Parent.Name.Length - 8).ToLower ().ToLower ();
549 string filename = Path.GetFileNameWithoutExtension (file.Name);
551 ImLog log = new KopeteLog (protocol, file.FullName);
553 log.Timestamp = file.LastWriteTime;
554 log.Identity = file.Directory.Name;
556 // FIXME: This is not safe for all kinds of file/screennames
557 log.SpeakingTo = filename.Substring (0, filename.LastIndexOf ('.'));
559 array.Add (log);
561 return array;