4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 using System
.Collections
;
30 using System
.Globalization
;
32 using System
.Text
.RegularExpressions
;
34 using Mono
.Unix
.Native
;
36 namespace Beagle
.Util
{
38 public enum ImClient
{
43 public abstract class ImLog
{
45 public delegate void Sink (ImLog imLog
);
49 public TextReader TextReader
;
50 public string Protocol
;
52 public DateTime StartTime
;
53 public DateTime EndTime
;
55 public string SpeakingTo
;
56 public string Identity
;
58 private Hashtable speakerHash
= new Hashtable ();
60 public class Utterance
{
61 private long timestamp
;
63 public DateTime Timestamp
{
64 get { return NativeConvert.ToDateTime (timestamp); }
65 set { timestamp = NativeConvert.FromDateTime (value); }
71 private ArrayList utterances
= new ArrayList ();
73 //////////////////////////
75 protected ImLog (string client
, FileInfo file
, TextReader reader
)
82 public ICollection Speakers
{
83 get { return speakerHash.Keys; }
86 public IList Utterances
{
87 get { return utterances; }
90 protected void AddUtterance (DateTime timestamp
, string who
, string text
)
92 Utterance utt
= new Utterance ();
93 utt
.Timestamp
= timestamp
;
95 utt
.Text
= text
.Trim ();
97 if (StartTime
.Ticks
== 0 || StartTime
> timestamp
)
98 StartTime
= timestamp
;
100 if (EndTime
.Ticks
== 0 || EndTime
< timestamp
)
103 speakerHash
[who
] = true;
105 utterances
.Add (utt
);
108 protected void AppendToPreviousUtterance (string text
)
110 if (utterances
.Count
> 0) {
111 Utterance utt
= (Utterance
) utterances
[utterances
.Count
- 1];
112 utt
.Text
+= "\n" + text
;
116 protected void ClearUtterances ()
121 protected abstract void Load ();
124 ///////////////////////////////////////////////////////////////////////////////
130 public class GaimLog
: ImLog
{
132 public const string MimeType
= "beagle/x-gaim-log";
134 ///////////////////////////////////////
136 public GaimLog (FileInfo file
, TextReader reader
) : base ("gaim", file
, reader
)
138 string filename
= file
.Name
;
140 // Parse what we can from the file path
144 // Character at position 17 will be either a dot, indicating the beginning
145 // of the extension for old gaim logs, or a plus or minus indicating a
146 // timezone offset for new gaim logs.
147 if (filename
[17] == '+' || filename
[17] == '-') {
148 // New gaim 2.0.0 format, including timezone.
150 // Ugly hack time: DateTime's format specifiers only know how to
151 // deal with timezones in the format "+HH:mm" and not "+HHmm",
152 // which is how UNIX traditionally encodes them. I have no idea
153 // why; it would make RFC 822/1123 parsing a hell of a lot easier.
154 // Anyway, in this case, we're going to insert a colon in there so
155 // that DateTime.ParseExact can understand it.
157 // 2006-02-21-160424-0500EST.html
161 str
= filename
.Substring (0, 20) + ':' + filename
.Substring (20, 2);
162 StartTime
= DateTime
.ParseExact (str
, "yyyy-MM-dd.HHmmsszzz", null);
163 } else if (filename
[17] == '.') {
164 // Older gaim format.
166 // 2006-02-21-160424.html
168 str
= Path
.GetFileNameWithoutExtension (filename
);
169 StartTime
= DateTime
.ParseExact (str
, "yyyy-MM-dd.HHmmss", null);
171 throw new FormatException ();
174 Logger
.Log
.Warn ("Could not parse date/time from filename '{0}'", file
.Name
);
175 StartTime
= DateTime
.Now
;
178 // Gaim likes to represent many characters in hex-escaped %xx form
179 SpeakingTo
= StringFu
.HexUnescape (file
.Directory
.Name
);
180 Identity
= StringFu
.HexUnescape (file
.Directory
.Parent
.Name
);
182 Protocol
= file
.Directory
.Parent
.Parent
.Name
;
187 // Return true if a new utterance is now available,
188 // and false if the previous utterance was changed.
189 private void ProcessLine (string line
)
191 if (line
.Length
== 0)
194 if (line
[0] != '(') {
195 AppendToPreviousUtterance (line
);
198 int j
= line
.IndexOf (')');
200 AppendToPreviousUtterance (line
);
205 // The new version of Gaim adds AM or PM right after the time
207 // 2.0: (19:07:07 AM)
209 string when
= line
.Substring (1, j
-1);
213 DateTime time
= DateTime
.Parse (when
);
215 timestamp
= new DateTime (StartTime
.Year
, StartTime
.Month
, StartTime
.Day
,
216 time
.Hour
, time
.Minute
, time
.Second
);
218 // Try to deal with time wrapping around.
219 if (timestamp
< EndTime
)
220 timestamp
.AddDays (1);
222 // If something goes wrong, this line probably
223 // spills over from the previous one.
224 AppendToPreviousUtterance (line
);
228 line
= line
.Substring (j
+2);
234 if (line
.StartsWith ("***")) {
235 i
= line
.IndexOf (' ');
237 alias = line
.Substring (3, i
- 3);
238 text
= line
.Substring (i
+ 1);
240 // FIXME: This will break if there is a ':' in the nickname
241 i
= line
.IndexOf (':');
242 if (i
== -1 || line
.Length
< i
+ 2)
245 alias = line
.Substring (0, i
);
246 text
= line
.Substring (i
+ 2);
249 AddUtterance (timestamp
, alias, text
);
254 protected override void Load ()
259 StringBuilder builder
;
260 builder
= new StringBuilder ();
262 line
= TextReader
.ReadLine (); // throw away first line
266 // Could the second line ever start w/ < in a non-html log?
268 bool isHtml
= line
.Length
> 0 && line
[0] == '<';
270 while ((line
= TextReader
.ReadLine ()) != null) {
272 line
= StringFu
.StripTags (line
, builder
);
276 } catch (Exception e
) {
277 Logger
.Log
.Warn ("Could not parse line in '{0}'", File
.FullName
);
284 ///////////////////////////////////////////////////////////////////////////////
289 public class KopeteLog
: ImLog
{
291 public const string MimeType
= "beagle/x-kopete-log";
293 public KopeteLog (FileInfo file
, TextReader reader
) : base ("kopete", file
, reader
)
295 // FIXME: Artificially split logs into conversations depending on the
296 // amount of time elapsed betweet messages?
298 // Figure out the protocol from the parent.parent or parent foldername
299 if (file
.Directory
.Parent
.Name
.EndsWith ("Protocol"))
300 Protocol
= file
.Directory
.Parent
.Name
.Substring (0, file
.Directory
.Parent
.Name
.Length
- 8).ToLower ();
301 else if (file
.Directory
.Name
.EndsWith ("Protocol"))
302 Protocol
= file
.Directory
.Name
.Substring (0, file
.Directory
.Name
.Length
- 8).ToLower ();
304 Protocol
= file
.Directory
.Name
;
305 Identity
= file
.Directory
.Name
;
307 // FIXME: This is not safe for all kinds of file/screennames
308 string filename
= Path
.GetFileNameWithoutExtension (file
.Name
);
309 if (filename
.LastIndexOf ('.') > 0)
310 SpeakingTo
= filename
.Substring (0, filename
.LastIndexOf ('.'));
311 else if (filename
.LastIndexOf ('_') > 0)
312 SpeakingTo
= filename
.Substring (0, filename
.LastIndexOf ('_'));
314 SpeakingTo
= filename
;
315 Logger
.Log
.Debug ("Speakingto for " + file
.Name
+ " is " + SpeakingTo
+ ", protocol is " + Protocol
);
319 private const string date_format
= "yyyy M d H:m:s";
321 protected override void Load ()
326 DateTime base_date
= DateTime
.MinValue
;
329 reader
= new XmlTextReader (File
.Open(
333 } catch (Exception e
) {
334 Console
.WriteLine ("Could not open '{0}'", File
.FullName
);
335 Console
.WriteLine (e
);
339 while (reader
.Read ()) {
340 if (reader
.NodeType
!= XmlNodeType
.Element
)
343 switch (reader
.Name
) {
345 base_date
= new DateTime (Convert
.ToInt32 (reader
.GetAttribute ("year")),
346 Convert
.ToInt32 (reader
.GetAttribute ("month")),
351 // Parse the timestamp of the message
352 string timestamp
= String
.Format ("{0} {1} {2}",
355 reader
.GetAttribute ("time"));
356 int time_separator_count
= 0;
357 foreach (int the_char
in timestamp
)
359 time_separator_count
++;
360 if (time_separator_count
< 2)
361 timestamp
= timestamp
+ ":00";
363 DateTime msg_date
= DateTime
.MinValue
;
366 msg_date
= DateTime
.ParseExact (timestamp
,
370 Logger
.Log
.Error ("Couldn't parse Kopete timestamp: {0}", timestamp
);
374 string who
= reader
.GetAttribute ("nick");
375 if (who
== null || who
== "")
376 who
= reader
.GetAttribute ("from");
377 if (who
== null || who
== "")
380 // Advance to the text node for the actual message
383 AddUtterance (msg_date
, who
, reader
.Value
);