Fixed #374055:Only the first "tag" is detected in digikam.
[beagle.git] / Util / ImLog.cs
blobcfc35feaa8053b3a58d0218cc487c8a985748bd6
1 //
2 // ImLog.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Globalization;
31 using System.Text;
32 using System.Text.RegularExpressions;
33 using System.Xml;
34 using Mono.Unix.Native;
36 namespace Beagle.Util {
38 public enum ImClient {
39 Gaim,
40 Kopete,
43 public abstract class ImLog {
45 public delegate void Sink (ImLog imLog);
47 public string Client;
48 public FileInfo File;
49 public TextReader TextReader;
50 public string Protocol;
52 public DateTime StartTime;
53 public DateTime EndTime;
55 public string SpeakingTo;
56 public string Identity;
58 private Hashtable speakerHash = new Hashtable ();
60 public class Utterance {
61 private long timestamp;
63 public DateTime Timestamp {
64 get { return NativeConvert.ToDateTime (timestamp); }
65 set { timestamp = NativeConvert.FromDateTime (value); }
68 public String Who;
69 public String Text;
71 private ArrayList utterances = new ArrayList ();
73 //////////////////////////
75 protected ImLog (string client, FileInfo file, TextReader reader)
77 Client = client;
78 TextReader = reader;
79 File = file;
82 public ICollection Speakers {
83 get { return speakerHash.Keys; }
86 public IList Utterances {
87 get { return utterances; }
90 protected void AddUtterance (DateTime timestamp, string who, string text)
92 Utterance utt = new Utterance ();
93 utt.Timestamp = timestamp;
94 utt.Who = who;
95 utt.Text = text.Trim ();
97 if (StartTime.Ticks == 0 || StartTime > timestamp)
98 StartTime = timestamp;
100 if (EndTime.Ticks == 0 || EndTime < timestamp)
101 EndTime = timestamp;
103 speakerHash [who] = true;
105 utterances.Add (utt);
108 protected void AppendToPreviousUtterance (string text)
110 if (utterances.Count > 0) {
111 Utterance utt = (Utterance) utterances [utterances.Count - 1];
112 utt.Text += "\n" + text;
116 protected void ClearUtterances ()
118 utterances.Clear ();
121 protected abstract void Load ();
124 ///////////////////////////////////////////////////////////////////////////////
127 // Gaim Logs
130 public class GaimLog : ImLog {
132 public const string MimeType = "beagle/x-gaim-log";
134 ///////////////////////////////////////
136 public GaimLog (FileInfo file, TextReader reader) : base ("gaim", file, reader)
138 string filename = file.Name;
140 // Parse what we can from the file path
141 try {
142 string str;
144 // Character at position 17 will be either a dot, indicating the beginning
145 // of the extension for old gaim logs, or a plus or minus indicating a
146 // timezone offset for new gaim logs.
147 if (filename [17] == '+' || filename [17] == '-') {
148 // New gaim 2.0.0 format, including timezone.
150 // Ugly hack time: DateTime's format specifiers only know how to
151 // deal with timezones in the format "+HH:mm" and not "+HHmm",
152 // which is how UNIX traditionally encodes them. I have no idea
153 // why; it would make RFC 822/1123 parsing a hell of a lot easier.
154 // Anyway, in this case, we're going to insert a colon in there so
155 // that DateTime.ParseExact can understand it.
157 // 2006-02-21-160424-0500EST.html
158 // ^
159 // offset 20
161 str = filename.Substring (0, 20) + ':' + filename.Substring (20, 2);
162 StartTime = DateTime.ParseExact (str, "yyyy-MM-dd.HHmmsszzz", null);
163 } else if (filename [17] == '.') {
164 // Older gaim format.
166 // 2006-02-21-160424.html
168 str = Path.GetFileNameWithoutExtension (filename);
169 StartTime = DateTime.ParseExact (str, "yyyy-MM-dd.HHmmss", null);
170 } else {
171 throw new FormatException ();
173 } catch {
174 Logger.Log.Warn ("Could not parse date/time from filename '{0}'", file.Name);
175 StartTime = DateTime.Now;
178 // Gaim likes to represent many characters in hex-escaped %xx form
179 SpeakingTo = StringFu.HexUnescape (file.Directory.Name);
180 Identity = StringFu.HexUnescape (file.Directory.Parent.Name);
182 Protocol = file.Directory.Parent.Parent.Name;
184 Load ();
187 // Return true if a new utterance is now available,
188 // and false if the previous utterance was changed.
189 private void ProcessLine (string line)
191 if (line.Length == 0)
192 return;
194 if (line [0] != '(') {
195 AppendToPreviousUtterance (line);
196 return;
198 int j = line.IndexOf (')');
199 if (j == -1) {
200 AppendToPreviousUtterance (line);
201 return;
204 // Gaim 2.0
205 // The new version of Gaim adds AM or PM right after the time
206 // 1.x: (19:07:07)
207 // 2.0: (19:07:07 AM)
209 string when = line.Substring (1, j-1);
210 DateTime timestamp;
212 try {
213 DateTime time = DateTime.Parse (when);
215 timestamp = new DateTime (StartTime.Year, StartTime.Month, StartTime.Day,
216 time.Hour, time.Minute, time.Second);
218 // Try to deal with time wrapping around.
219 if (timestamp < EndTime)
220 timestamp.AddDays (1);
221 } catch {
222 // If something goes wrong, this line probably
223 // spills over from the previous one.
224 AppendToPreviousUtterance (line);
225 return;
228 line = line.Substring (j+2);
230 // Extract the alias
231 string alias, text;
232 int i;
234 if (line.StartsWith ("***")) {
235 i = line.IndexOf (' ');
237 alias = line.Substring (3, i - 3);
238 text = line.Substring (i + 1);
239 } else {
240 // FIXME: This will break if there is a ':' in the nickname
241 i = line.IndexOf (':');
242 if (i == -1 || line.Length < i + 2)
243 return;
245 alias = line.Substring (0, i);
246 text = line.Substring (i + 2);
249 AddUtterance (timestamp, alias, text);
251 return;
254 protected override void Load ()
256 string line;
258 ClearUtterances ();
259 StringBuilder builder;
260 builder = new StringBuilder ();
262 line = TextReader.ReadLine (); // throw away first line
263 if (line == null)
264 return;
266 // Could the second line ever start w/ < in a non-html log?
267 // I hope not!
268 bool isHtml = line.Length > 0 && line [0] == '<';
270 while ((line = TextReader.ReadLine ()) != null) {
271 if (isHtml)
272 line = StringFu.StripTags (line, builder);
274 try {
275 ProcessLine (line);
276 } catch (Exception e) {
277 Logger.Log.Warn ("Could not parse line in '{0}'", File.FullName);
278 Logger.Log.Warn (e);
284 ///////////////////////////////////////////////////////////////////////////////
287 // Kopete Logs
289 public class KopeteLog : ImLog {
291 public const string MimeType = "beagle/x-kopete-log";
293 public KopeteLog (FileInfo file, TextReader reader) : base ("kopete", file, reader)
295 // FIXME: Artificially split logs into conversations depending on the
296 // amount of time elapsed betweet messages?
298 // Figure out the protocol from the parent.parent or parent foldername
299 if (file.Directory.Parent.Name.EndsWith ("Protocol"))
300 Protocol = file.Directory.Parent.Name.Substring (0, file.Directory.Parent.Name.Length - 8).ToLower ();
301 else if (file.Directory.Name.EndsWith ("Protocol"))
302 Protocol = file.Directory.Name.Substring (0, file.Directory.Name.Length - 8).ToLower ();
303 else
304 Protocol = file.Directory.Name;
305 Identity = file.Directory.Name;
307 // FIXME: This is not safe for all kinds of file/screennames
308 string filename = Path.GetFileNameWithoutExtension (file.Name);
309 if (filename.LastIndexOf ('.') > 0)
310 SpeakingTo = filename.Substring (0, filename.LastIndexOf ('.'));
311 else if (filename.LastIndexOf ('_') > 0)
312 SpeakingTo = filename.Substring (0, filename.LastIndexOf ('_'));
313 else
314 SpeakingTo = filename;
315 Logger.Log.Debug ("Speakingto for " + file.Name + " is " + SpeakingTo + ", protocol is " + Protocol);
316 Load ();
319 private const string date_format = "yyyy M d H:m:s";
321 protected override void Load ()
323 ClearUtterances ();
325 XmlReader reader;
326 DateTime base_date = DateTime.MinValue;
328 try {
329 reader = new XmlTextReader (File.Open(
330 FileMode.Open,
331 FileAccess.Read,
332 FileShare.Read));
333 } catch (Exception e) {
334 Console.WriteLine ("Could not open '{0}'", File.FullName);
335 Console.WriteLine (e);
336 return;
339 while (reader.Read ()) {
340 if (reader.NodeType != XmlNodeType.Element)
341 continue;
343 switch (reader.Name) {
344 case "date":
345 base_date = new DateTime (Convert.ToInt32 (reader.GetAttribute ("year")),
346 Convert.ToInt32 (reader.GetAttribute ("month")),
348 break;
350 case "msg":
351 // Parse the timestamp of the message
352 string timestamp = String.Format ("{0} {1} {2}",
353 base_date.Year,
354 base_date.Month,
355 reader.GetAttribute ("time"));
356 int time_separator_count = 0;
357 foreach (int the_char in timestamp)
358 if (the_char == ':')
359 time_separator_count++;
360 if (time_separator_count < 2)
361 timestamp = timestamp + ":00";
363 DateTime msg_date = DateTime.MinValue;
365 try {
366 msg_date = DateTime.ParseExact (timestamp,
367 date_format,
368 null);
369 } catch {
370 Logger.Log.Error ("Couldn't parse Kopete timestamp: {0}", timestamp);
371 break;
374 string who = reader.GetAttribute ("nick");
375 if (who == null || who == "")
376 who = reader.GetAttribute ("from");
377 if (who == null || who == "")
378 break;
380 // Advance to the text node for the actual message
381 reader.Read ();
383 AddUtterance (msg_date, who, reader.Value);
384 break;
388 reader.Close ();