* tools/beagle-crawl-system.in: Use MONO_SHARED_DIR to point to a
[beagle.git] / Util / ImLog.cs
bloba8137333e507b0744bd7335181d08bda145746c4
1 //
2 // ImLog.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Globalization;
31 using System.Text;
32 using System.Text.RegularExpressions;
33 using System.Xml;
34 using Mono.Unix.Native;
36 namespace Beagle.Util {
38 public enum ImClient {
39 Gaim,
40 Kopete,
43 public abstract class ImLog {
45 public delegate void Sink (ImLog imLog);
47 public string Client;
48 public FileInfo File;
49 public TextReader TextReader;
50 public string Protocol;
52 public DateTime StartTime;
53 public DateTime EndTime;
55 public string SpeakingTo;
56 public string Identity;
58 private Hashtable speakerHash = new Hashtable ();
60 public class Utterance {
61 private long timestamp;
63 public DateTime Timestamp {
64 get { return NativeConvert.ToDateTime (timestamp); }
65 set { timestamp = NativeConvert.FromDateTime (value); }
68 public String Who;
69 public String Text;
71 private ArrayList utterances = new ArrayList ();
73 //////////////////////////
75 protected ImLog (string client, FileInfo file, TextReader reader)
77 Client = client;
78 TextReader = reader;
79 File = file;
82 public ICollection Speakers {
83 get { return speakerHash.Keys; }
86 public IList Utterances {
87 get { return utterances; }
90 protected void AddUtterance (DateTime timestamp, string who, string text)
92 Utterance utt = new Utterance ();
93 utt.Timestamp = timestamp;
94 utt.Who = who;
95 utt.Text = text.Trim ();
97 if (StartTime.Ticks == 0 || StartTime > timestamp)
98 StartTime = timestamp;
100 if (EndTime.Ticks == 0 || EndTime < timestamp)
101 EndTime = timestamp;
103 speakerHash [who] = true;
105 utterances.Add (utt);
108 protected void AppendToPreviousUtterance (string text)
110 if (utterances.Count > 0) {
111 Utterance utt = (Utterance) utterances [utterances.Count - 1];
112 utt.Text += "\n" + text;
116 protected void ClearUtterances ()
118 utterances.Clear ();
121 protected abstract void Load ();
124 ///////////////////////////////////////////////////////////////////////////////
127 // Gaim Logs
130 public class GaimLog : ImLog {
132 public const string MimeType = "beagle/x-gaim-log";
134 private static string StripTags (string line, StringBuilder builder)
136 int first = line.IndexOf ('<');
137 if (first == -1)
138 return line;
140 builder.Length = 0;
142 int i = 0;
143 while (i < line.Length) {
145 int j;
146 if (first == -1) {
147 j = line.IndexOf ('<', i);
148 } else {
149 j = first;
150 first = -1;
153 int k = -1;
154 if (j != -1) {
155 k = line.IndexOf ('>', j);
157 // If a "<" is unmatched, preserve it, and the
158 // rest of the line
159 if (k == -1)
160 j = -1;
163 if (j == -1) {
164 builder.Append (line, i, line.Length - i);
165 break;
168 builder.Append (line, i, j-i);
170 i = k+1;
173 return builder.ToString ();
176 ///////////////////////////////////////
178 public GaimLog (FileInfo file, TextReader reader) : base ("gaim", file, reader)
180 string filename = file.Name;
182 // Parse what we can from the file path
183 try {
184 string str;
186 // Character at position 17 will be either a dot, indicating the beginning
187 // of the extension for old gaim logs, or a plus or minus indicating a
188 // timezone offset for new gaim logs.
189 if (filename [17] == '+' || filename [17] == '-') {
190 // New gaim 2.0.0 format, including timezone.
192 // Ugly hack time: DateTime's format specifiers only know how to
193 // deal with timezones in the format "+HH:mm" and not "+HHmm",
194 // which is how UNIX traditionally encodes them. I have no idea
195 // why; it would make RFC 822/1123 parsing a hell of a lot easier.
196 // Anyway, in this case, we're going to insert a colon in there so
197 // that DateTime.ParseExact can understand it.
199 // 2006-02-21-160424-0500EST.html
200 // ^
201 // offset 20
203 str = filename.Substring (0, 20) + ':' + filename.Substring (20, 2);
204 StartTime = DateTime.ParseExact (str, "yyyy-MM-dd.HHmmsszzz", null);
205 } else if (filename [17] == '.') {
206 // Older gaim format.
208 // 2006-02-21-160424.html
210 str = Path.GetFileNameWithoutExtension (filename);
211 StartTime = DateTime.ParseExact (str, "yyyy-MM-dd.HHmmss", null);
212 } else {
213 throw new FormatException ();
215 } catch (Exception) {
216 Logger.Log.Warn ("Could not parse date/time from filename '{0}'", file.Name);
217 StartTime = DateTime.Now;
220 // Gaim likes to represent many characters in hex-escaped %xx form
221 SpeakingTo = StringFu.HexUnescape (file.Directory.Name);
222 Identity = StringFu.HexUnescape (file.Directory.Parent.Name);
224 Protocol = file.Directory.Parent.Parent.Name;
226 Load ();
229 // Return true if a new utterance is now available,
230 // and false if the previous utterance was changed.
231 private void ProcessLine (string line)
233 if (line.Length == 0)
234 return;
236 if (line [0] != '(') {
237 AppendToPreviousUtterance (line);
238 return;
240 int j = line.IndexOf (')');
241 if (j == -1) {
242 AppendToPreviousUtterance (line);
243 return;
246 // Gaim 2.0
247 // The new version of Gaim adds AM or PM right after the time
248 // 1.x: (19:07:07)
249 // 2.0: (19:07:07 AM)
251 string when = line.Substring (1, j-1);
252 DateTime timestamp;
254 try {
255 DateTime time = DateTime.Parse (when);
257 timestamp = new DateTime (StartTime.Year, StartTime.Month, StartTime.Day,
258 time.Hour, time.Minute, time.Second);
260 // Try to deal with time wrapping around.
261 if (timestamp < EndTime)
262 timestamp.AddDays (1);
263 } catch {
264 // If something goes wrong, this line probably
265 // spills over from the previous one.
266 AppendToPreviousUtterance (line);
267 return;
270 line = line.Substring (j+2);
272 // Extract the alias
273 // FIXME: This will break if there is a ':' in the nickname
274 int i = line.IndexOf (':');
275 if (i == -1)
276 return;
277 string alias = line.Substring (0, i);
278 string text = line.Substring (i+2);
280 AddUtterance (timestamp, alias, text);
282 return;
285 protected override void Load ()
287 string line;
289 ClearUtterances ();
290 StringBuilder builder;
291 builder = new StringBuilder ();
293 line = TextReader.ReadLine (); // throw away first line
294 if (line == null)
295 return;
297 // Could the second line ever start w/ < in a non-html log?
298 // I hope not!
299 bool isHtml = line.Length > 0 && line [0] == '<';
301 while ((line = TextReader.ReadLine ()) != null) {
302 if (isHtml)
303 line = StripTags (line, builder);
305 ProcessLine (line);
310 ///////////////////////////////////////////////////////////////////////////////
313 // Kopete Logs
315 public class KopeteLog : ImLog {
317 public const string MimeType = "beagle/x-kopete-log";
319 public KopeteLog (FileInfo file, TextReader reader) : base ("kopete", file, reader)
321 // FIXME: Artificially split logs into conversations depending on the
322 // amount of time elapsed betweet messages?
324 // Figure out the protocol from the parent.parent foldername
325 Protocol = file.Directory.Parent.Name.Substring (0, file.Directory.Parent.Name.Length - 8).ToLower ().ToLower ();
326 Identity = file.Directory.Name;
328 // FIXME: This is not safe for all kinds of file/screennames
329 string filename = Path.GetFileNameWithoutExtension (file.Name);
330 SpeakingTo = filename.Substring (0, filename.LastIndexOf ('.'));
332 Load ();
335 private const string date_format = "yyyy M d H:m:s";
337 protected override void Load ()
339 ClearUtterances ();
341 XmlReader reader;
342 DateTime base_date = DateTime.MinValue;
344 try {
345 reader = new XmlTextReader (File.Open(
346 FileMode.Open,
347 FileAccess.Read,
348 FileShare.Read));
349 } catch (Exception e) {
350 Console.WriteLine ("Could not open '{0}'", File.FullName);
351 Console.WriteLine (e);
352 return;
355 while (reader.Read ()) {
356 if (reader.NodeType != XmlNodeType.Element)
357 continue;
359 switch (reader.Name) {
360 case "date":
361 base_date = new DateTime (Convert.ToInt32 (reader.GetAttribute ("year")),
362 Convert.ToInt32 (reader.GetAttribute ("month")),
364 break;
366 case "msg":
367 // Parse the timestamp of the message
368 string timestamp = String.Format ("{0} {1} {2}",
369 base_date.Year,
370 base_date.Month,
371 reader.GetAttribute ("time"));
373 DateTime msg_date = DateTime.MinValue;
375 try {
376 msg_date = DateTime.ParseExact (timestamp,
377 date_format,
378 null);
379 } catch (Exception ex) {
380 Logger.Log.Error ("Couldn't parse Kopete timestamp: {0}", timestamp);
381 break;
384 string who = reader.GetAttribute ("nick");
385 if (who == null || who == "")
386 who = reader.GetAttribute ("from");
387 if (who == null || who == "")
388 break;
390 // Advance to the text node for the actual message
391 reader.Read ();
393 AddUtterance (msg_date, who, reader.Value);
394 break;
398 reader.Close ();