Add --enable-deletion option to buildindex. If used, buildindex will remove deleted...
[beagle.git] / Util / ImLog.cs
blob93e754ff177de9b17d5058c47856037fd48d2262
1 //
2 // ImLog.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Globalization;
31 using System.Text;
32 using System.Text.RegularExpressions;
33 using System.Xml;
34 using Mono.Unix.Native;
36 namespace Beagle.Util {
38 public enum ImClient {
39 Gaim,
40 Kopete,
43 public abstract class ImLog {
45 public delegate void Sink (ImLog imLog);
47 public string Client;
48 public FileInfo File;
49 public TextReader TextReader;
50 public string Protocol;
52 public DateTime StartTime;
53 public DateTime EndTime;
55 public string SpeakingTo;
56 public string Identity;
58 private Hashtable speakerHash = new Hashtable ();
60 public class Utterance {
61 private long timestamp;
63 public DateTime Timestamp {
64 get { return NativeConvert.ToDateTime (timestamp); }
65 set { timestamp = NativeConvert.FromDateTime (value); }
68 public String Who;
69 public String Text;
71 private ArrayList utterances = new ArrayList ();
73 //////////////////////////
75 protected ImLog (string client, FileInfo file, TextReader reader)
77 Client = client;
78 TextReader = reader;
79 File = file;
82 public ICollection Speakers {
83 get { return speakerHash.Keys; }
86 public IList Utterances {
87 get { return utterances; }
90 protected void AddUtterance (DateTime timestamp, string who, string text)
92 Utterance utt = new Utterance ();
93 utt.Timestamp = timestamp;
94 utt.Who = who;
95 utt.Text = text.Trim ();
97 if (StartTime.Ticks == 0 || StartTime > timestamp)
98 StartTime = timestamp;
100 if (EndTime.Ticks == 0 || EndTime < timestamp)
101 EndTime = timestamp;
103 speakerHash [who] = true;
105 utterances.Add (utt);
108 protected void AppendToPreviousUtterance (string text)
110 if (utterances.Count > 0) {
111 Utterance utt = (Utterance) utterances [utterances.Count - 1];
112 utt.Text += "\n" + text;
116 protected void ClearUtterances ()
118 utterances.Clear ();
121 protected abstract void Load ();
124 ///////////////////////////////////////////////////////////////////////////////
127 // Gaim Logs
130 public class GaimLog : ImLog {
132 public const string MimeType = "beagle/x-gaim-log";
134 ///////////////////////////////////////
136 public GaimLog (FileInfo file, TextReader reader) : base ("gaim", file, reader)
138 string filename = file.Name;
140 // Parse what we can from the file path
141 try {
142 string str;
144 // Character at position 17 will be either a dot, indicating the beginning
145 // of the extension for old gaim logs, or a plus or minus indicating a
146 // timezone offset for new gaim logs.
147 if (filename [17] == '+' || filename [17] == '-') {
148 // New gaim 2.0.0 format, including timezone.
150 // Ugly hack time: DateTime's format specifiers only know how to
151 // deal with timezones in the format "+HH:mm" and not "+HHmm",
152 // which is how UNIX traditionally encodes them. I have no idea
153 // why; it would make RFC 822/1123 parsing a hell of a lot easier.
154 // Anyway, in this case, we're going to insert a colon in there so
155 // that DateTime.ParseExact can understand it.
157 // 2006-02-21-160424-0500EST.html
158 // ^
159 // offset 20
161 str = filename.Substring (0, 20) + ':' + filename.Substring (20, 2);
162 StartTime = DateTime.ParseExact (str, "yyyy-MM-dd.HHmmsszzz", null);
163 } else if (filename [17] == '.') {
164 // Older gaim format.
166 // 2006-02-21-160424.html
168 str = Path.GetFileNameWithoutExtension (filename);
169 StartTime = DateTime.ParseExact (str, "yyyy-MM-dd.HHmmss", null);
170 } else {
171 throw new FormatException ();
173 } catch (Exception) {
174 Logger.Log.Warn ("Could not parse date/time from filename '{0}'", file.Name);
175 StartTime = DateTime.Now;
178 // Gaim likes to represent many characters in hex-escaped %xx form
179 SpeakingTo = StringFu.HexUnescape (file.Directory.Name);
180 Identity = StringFu.HexUnescape (file.Directory.Parent.Name);
182 Protocol = file.Directory.Parent.Parent.Name;
184 Load ();
187 // Return true if a new utterance is now available,
188 // and false if the previous utterance was changed.
189 private void ProcessLine (string line)
191 if (line.Length == 0)
192 return;
194 if (line [0] != '(') {
195 AppendToPreviousUtterance (line);
196 return;
198 int j = line.IndexOf (')');
199 if (j == -1) {
200 AppendToPreviousUtterance (line);
201 return;
204 // Gaim 2.0
205 // The new version of Gaim adds AM or PM right after the time
206 // 1.x: (19:07:07)
207 // 2.0: (19:07:07 AM)
209 string when = line.Substring (1, j-1);
210 DateTime timestamp;
212 try {
213 DateTime time = DateTime.Parse (when);
215 timestamp = new DateTime (StartTime.Year, StartTime.Month, StartTime.Day,
216 time.Hour, time.Minute, time.Second);
218 // Try to deal with time wrapping around.
219 if (timestamp < EndTime)
220 timestamp.AddDays (1);
221 } catch {
222 // If something goes wrong, this line probably
223 // spills over from the previous one.
224 AppendToPreviousUtterance (line);
225 return;
228 line = line.Substring (j+2);
230 // Extract the alias
231 // FIXME: This will break if there is a ':' in the nickname
232 int i = line.IndexOf (':');
233 if (i == -1)
234 return;
235 string alias = line.Substring (0, i);
236 string text = line.Substring (i+2);
238 AddUtterance (timestamp, alias, text);
240 return;
243 protected override void Load ()
245 string line;
247 ClearUtterances ();
248 StringBuilder builder;
249 builder = new StringBuilder ();
251 line = TextReader.ReadLine (); // throw away first line
252 if (line == null)
253 return;
255 // Could the second line ever start w/ < in a non-html log?
256 // I hope not!
257 bool isHtml = line.Length > 0 && line [0] == '<';
259 while ((line = TextReader.ReadLine ()) != null) {
260 if (isHtml)
261 line = StringFu.StripTags (line, builder);
263 ProcessLine (line);
268 ///////////////////////////////////////////////////////////////////////////////
271 // Kopete Logs
273 public class KopeteLog : ImLog {
275 public const string MimeType = "beagle/x-kopete-log";
277 public KopeteLog (FileInfo file, TextReader reader) : base ("kopete", file, reader)
279 // FIXME: Artificially split logs into conversations depending on the
280 // amount of time elapsed betweet messages?
282 // Figure out the protocol from the parent.parent foldername
283 Protocol = file.Directory.Parent.Name.Substring (0, file.Directory.Parent.Name.Length - 8).ToLower ().ToLower ();
284 Identity = file.Directory.Name;
286 // FIXME: This is not safe for all kinds of file/screennames
287 string filename = Path.GetFileNameWithoutExtension (file.Name);
288 SpeakingTo = filename.Substring (0, filename.LastIndexOf ('.'));
290 Load ();
293 private const string date_format = "yyyy M d H:m:s";
295 protected override void Load ()
297 ClearUtterances ();
299 XmlReader reader;
300 DateTime base_date = DateTime.MinValue;
302 try {
303 reader = new XmlTextReader (File.Open(
304 FileMode.Open,
305 FileAccess.Read,
306 FileShare.Read));
307 } catch (Exception e) {
308 Console.WriteLine ("Could not open '{0}'", File.FullName);
309 Console.WriteLine (e);
310 return;
313 while (reader.Read ()) {
314 if (reader.NodeType != XmlNodeType.Element)
315 continue;
317 switch (reader.Name) {
318 case "date":
319 base_date = new DateTime (Convert.ToInt32 (reader.GetAttribute ("year")),
320 Convert.ToInt32 (reader.GetAttribute ("month")),
322 break;
324 case "msg":
325 // Parse the timestamp of the message
326 string timestamp = String.Format ("{0} {1} {2}",
327 base_date.Year,
328 base_date.Month,
329 reader.GetAttribute ("time"));
331 DateTime msg_date = DateTime.MinValue;
333 try {
334 msg_date = DateTime.ParseExact (timestamp,
335 date_format,
336 null);
337 } catch (Exception ex) {
338 Logger.Log.Error ("Couldn't parse Kopete timestamp: {0}", timestamp);
339 break;
342 string who = reader.GetAttribute ("nick");
343 if (who == null || who == "")
344 who = reader.GetAttribute ("from");
345 if (who == null || who == "")
346 break;
348 // Advance to the text node for the actual message
349 reader.Read ();
351 AddUtterance (msg_date, who, reader.Value);
352 break;
356 reader.Close ();