4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 using System
.Collections
;
30 using System
.Globalization
;
32 using System
.Text
.RegularExpressions
;
35 namespace Beagle
.Util
{
37 public abstract class ImLog
{
39 public delegate void Sink (ImLog imLog
);
41 private bool loaded
= false;
43 public string LogFile
;
44 public long LogOffset
;
46 public string Protocol
;
49 public DateTime StartTime
;
50 public DateTime EndTime
;
51 public DateTime Timestamp
;
52 public string Snippet
;
54 public string SpeakingTo
;
55 public string Identity
;
57 private Hashtable speakerHash
= new Hashtable ();
59 public class Utterance
{
60 public DateTime Timestamp
;
64 private ArrayList utterances
= new ArrayList ();
66 //////////////////////////
68 protected ImLog (string client
, string protocol
, string file
, long offset
)
76 protected ImLog (string client
, string protocol
, string file
) : this (client
, protocol
, file
, -1)
80 get { return UriFu.PathToFileUri (this.LogFile); }
83 public string EllipsizedSnippet
{
85 string snippet
= Snippet
;
86 // FIXME: We should try to avoid breaking mid-word
87 if (snippet
!= null && snippet
.Length
> 50)
88 snippet
= snippet
.Substring (0, 50) + "...";
93 public ICollection Speakers
{
94 get { return speakerHash.Keys; }
97 public IList Utterances
{
107 protected IList RawUtterances
{
108 get { return utterances; }
111 protected void AddUtterance (DateTime timestamp
,
115 Utterance utt
= new Utterance ();
116 utt
.Timestamp
= timestamp
;
120 if (StartTime
.Ticks
== 0 || StartTime
> timestamp
)
121 StartTime
= timestamp
;
123 if (EndTime
.Ticks
== 0 || EndTime
< timestamp
)
126 speakerHash
[who
] = true;
128 utterances
.Add (utt
);
131 protected void AppendToPreviousUtterance (string text
)
133 if (utterances
.Count
> 0) {
134 Utterance utt
= (Utterance
) utterances
[utterances
.Count
- 1];
135 utt
.Text
+= "\n" + text
;
139 protected void ClearUtterances ()
144 protected abstract void Load ();
147 ///////////////////////////////////////////////////////////////////////////////
153 public class GaimLog
: ImLog
{
155 private static string StripTags (string line
)
157 int first
= line
.IndexOf ('<');
161 StringBuilder builder
= new StringBuilder ();
163 while (i
< line
.Length
) {
167 j
= line
.IndexOf ('<', i
);
175 k
= line
.IndexOf ('>', j
);
177 // If a "<" is unmatched, preserve it, and the
184 builder
.Append (line
, i
, line
.Length
- i
);
188 builder
.Append (line
, i
, j
-i
);
193 return builder
.ToString ();
196 private static bool IsNewConversation (string line
)
198 int i
= line
.IndexOf ("--- New Conv");
199 return 0 <= i
&& i
< 5;
202 static private string REGEX_DATE
=
203 "Conversation @ \\S+\\s+(\\S+)\\s+(\\d+)\\s+(\\d+):(\\d+):(\\d+)\\s+(\\d+)";
205 static private Regex dateRegex
= new Regex (REGEX_DATE
,
206 RegexOptions
.IgnoreCase
| RegexOptions
.Compiled
);
207 static private DateTimeFormatInfo dtInfo
= new DateTimeFormatInfo ();
209 private static DateTime
NewConversationTime (string line
)
211 Match m
= dateRegex
.Match (line
);
213 // I'm sure there is an easier way to do this.
214 String monthName
= m
.Groups
[1].ToString ();
215 int day
= int.Parse (m
.Groups
[2].ToString ());
216 int hr
= int.Parse (m
.Groups
[3].ToString ());
217 int min
= int.Parse (m
.Groups
[4].ToString ());
218 int sec
= int.Parse (m
.Groups
[5].ToString ());
219 int yr
= int.Parse (m
.Groups
[6].ToString ());
222 for (int i
= 1; i
<= 12; ++i
) {
223 if (monthName
== dtInfo
.GetAbbreviatedMonthName (i
)) {
230 return new DateTime (yr
, mo
, day
, hr
, min
, sec
);
233 Console
.WriteLine ("Failed on '{0}'", line
);
234 return new DateTime ();
237 ///////////////////////////////////////
239 private bool TrySnippet ()
241 int best_word_count
= 0;
243 foreach (Utterance utt
in RawUtterances
) {
245 string possible_snippet
= utt
.Text
.Trim ();
247 int word_count
= StringFu
.CountWords (possible_snippet
, 15);
248 if (word_count
> best_word_count
) {
249 Snippet
= possible_snippet
;
250 best_word_count
= word_count
;
261 // FIXME: The ending timestamp in the log will be inaccurate
262 // until Load is called... before that, the ending time will
263 // come from the timestamp of the snippet-line.
265 private void SetSnippet ()
267 LoadWithTermination (new LoadTerminator (TrySnippet
));
270 ///////////////////////////////////////
272 private GaimLog (string protocol
, string file
, long offset
) : base ("gaim", protocol
, file
, offset
)
277 private GaimLog (string protocol
, string file
) : base ("gaim", protocol
, file
)
282 // Return true if a new utterance is now available,
283 // and false if the previous utterance was changed.
284 private bool ProcessLine (string line
)
286 if (! line
.StartsWith ("(")) {
287 AppendToPreviousUtterance (line
);
290 int j
= line
.IndexOf (')');
292 AppendToPreviousUtterance (line
);
295 string whenStr
= line
.Substring (1, j
-1);
296 string[] whenSplit
= whenStr
.Split (':');
297 int hour
, minute
, second
;
299 hour
= int.Parse (whenSplit
[0]);
300 minute
= int.Parse (whenSplit
[1]);
301 second
= int.Parse (whenSplit
[2]);
303 // If something goes wrong, this line probably
304 // spills over from the previous one.
305 AppendToPreviousUtterance (line
);
309 line
= line
.Substring (j
+1).Trim ();
311 // FIXME: this is wrong --- since we just get a time,
312 // the date gets set to 'now'
313 DateTime when
= new DateTime (StartTime
.Year
,
316 hour
, minute
, second
);
318 // Try to deal with time wrapping around.
319 while (when
< EndTime
)
320 when
= when
.AddDays (1);
322 int i
= line
.IndexOf (':');
325 string alias = line
.Substring (0, i
);
326 string text
= line
.Substring (i
+1).Trim ();
328 AddUtterance (when
, alias, text
);
333 protected delegate bool LoadTerminator ();
335 protected override void Load ()
338 LoadWithTermination (null);
341 protected void LoadWithTermination (LoadTerminator terminator
)
348 fs
= new FileStream (LogFile
,
353 fs
.Seek (LogOffset
, SeekOrigin
.Begin
);
354 sr
= new StreamReader (fs
);
355 } catch (Exception e
) {
356 // If we can't open the file, just fail.
357 Console
.WriteLine ("Could not open '{0}' (offset={1})", LogFile
, LogOffset
);
358 Console
.WriteLine (e
);
362 line
= sr
.ReadLine (); // throw away first line
365 // Could the second line ever start w/ < in a non-html log?
367 bool isHtml
= line
.Length
> 0 && line
[0] == '<';
369 while ((line
= sr
.ReadLine ()) != null) {
371 line
= StripTags (line
);
373 if (IsNewConversation (line
))
376 // Only check termination when a new Utterance has become
378 if (ProcessLine (line
)
379 && terminator
!= null
389 private static void ScanNewStyleLog (FileInfo file
, ArrayList array
)
391 // file.Directory.Parent.Parent.Name is the name of the current protocol (ex. aim)
392 ImLog log
= new GaimLog (file
.Directory
.Parent
.Parent
.Name
, file
.FullName
);
394 string startStr
= Path
.GetFileNameWithoutExtension (file
.Name
);
396 log
.StartTime
= DateTime
.ParseExact (startStr
,
398 CultureInfo
.CurrentCulture
);
399 } catch (FormatException
) {
400 Logger
.Log
.Warn ("IMLog: Could not parse date/time from '{0}', ignoring.", startStr
);
404 log
.Timestamp
= file
.LastWriteTime
;
406 // Gaim likes to represent many characters in hex-escaped %xx form
407 log
.SpeakingTo
= StringFu
.HexUnescape (file
.Directory
.Name
);
408 log
.Identity
= StringFu
.HexUnescape (file
.Directory
.Parent
.Name
);
414 private static void ScanOldStyleLog (FileInfo file
, ArrayList array
)
417 stream
= new FileStream (file
.FullName
,
421 StreamReader sr
= new StreamReader (stream
);
425 string speakingTo
= Path
.GetFileNameWithoutExtension (file
.Name
);
427 line
= sr
.ReadLine ();
428 bool isHtml
= line
.ToLower ().StartsWith ("<html>");
429 offset
= line
.Length
+ 1;
431 while ((line
= sr
.ReadLine ()) != null) {
432 long newOffset
= offset
+ line
.Length
+ 1;
434 line
= StripTags (line
);
435 if (IsNewConversation (line
)) {
436 ImLog log
= new GaimLog ("aim", file
.FullName
, offset
); //FIXME: protocol
437 log
.StartTime
= NewConversationTime (line
);
438 log
.Identity
= "_OldGaim_"; // FIXME: parse a few lines of the log to figure this out
439 log
.SpeakingTo
= speakingTo
;
451 public static ICollection
ScanLog (FileInfo file
)
453 ArrayList array
= new ArrayList ();
454 if (file
.Extension
== ".txt" || file
.Extension
== ".html")
455 ScanNewStyleLog (file
, array
);
456 else if (file
.Extension
== ".log")
457 ScanOldStyleLog (file
, array
);
462 ///////////////////////////////////////////////////////////////////////////////
468 public class KopeteLog
: ImLog
{
470 private KopeteLog (string protocol
, string file
) : base ("kopete", protocol
, file
)
474 private const string date_format
= "yyyy M d H:m:s";
476 protected override void Load ()
481 DateTime base_date
= DateTime
.MinValue
;
484 reader
= new XmlTextReader (new FileStream (LogFile
,
488 } catch (Exception e
) {
489 Console
.WriteLine ("Could not open '{0}'", LogFile
);
490 Console
.WriteLine (e
);
494 while (reader
.Read ()) {
495 if (reader
.NodeType
!= XmlNodeType
.Element
)
498 switch (reader
.Name
) {
500 base_date
= new DateTime (Convert
.ToInt32 (reader
.GetAttribute ("year")),
501 Convert
.ToInt32 (reader
.GetAttribute ("month")),
506 // Parse the timestamp of the message
507 string timestamp
= String
.Format ("{0} {1} {2}",
510 reader
.GetAttribute ("time"));
512 DateTime msg_date
= DateTime
.MinValue
;
515 msg_date
= DateTime
.ParseExact (timestamp
,
518 } catch (Exception ex
) {
519 Logger
.Log
.Error ("Couldn't parse Kopete timestamp: {0}", timestamp
);
523 string who
= reader
.GetAttribute ("nick");
524 if (who
== null || who
== "")
525 who
= reader
.GetAttribute ("from");
526 if (who
== null || who
== "")
529 // Advance to the text node for the actual message
532 AddUtterance (msg_date
, who
, reader
.Value
);
540 public static ICollection
ScanLog (FileInfo file
)
542 ArrayList array
= new ArrayList ();
544 // FIXME: Artificially split logs into conversations depending on the
545 // amount of time elapsed betweet messages?
547 // Figure out the protocol from the parent.parent foldername
548 string protocol
= file
.Directory
.Parent
.Name
.Substring (0, file
.Directory
.Parent
.Name
.Length
- 8).ToLower ().ToLower ();
549 string filename
= Path
.GetFileNameWithoutExtension (file
.Name
);
551 ImLog log
= new KopeteLog (protocol
, file
.FullName
);
553 log
.Timestamp
= file
.LastWriteTime
;
554 log
.Identity
= file
.Directory
.Name
;
556 // FIXME: This is not safe for all kinds of file/screennames
557 log
.SpeakingTo
= filename
.Substring (0, filename
.LastIndexOf ('.'));