Fixed #374055:Only the first "tag" is detected in digikam.
[beagle.git] / Util / Mork.cs
blob11fa5e9155906e277cdafcca895f9fd4a41f62f3
1 //
2 // Mork.cs: A parser for mork files (used by software such as Firefox and Thunderbird)
3 //
4 // Copyright (C) 2006 Pierre Östlund
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Text;
30 using System.Collections;
31 using System.Text.RegularExpressions;
33 namespace Beagle.Util
35 public class MorkDatabase : IEnumerable {
36 protected string mork_file;
37 protected string enum_namespace;
38 protected string mork_version;
40 protected Hashtable dicts;
41 protected Hashtable metadicts;
42 protected Hashtable rows;
43 protected Hashtable tables;
45 protected string regex_row = @"(?<action>[-!+]?)\[(-|)(?<roid>[0-9A-Za-z:\^]+)(?<cells>(?>[^\[\]]+)?)\]";
46 protected string regex_cell = @"\^(?<key>[0-9A-Fa-f]+)(\^(?<pvalue>[0-9A-Fa-f]+)|=(?<value>[0-9A-Fa-f]+))";
47 protected string regex_table = @"{.*?:(?<ns>[0-9A-Fa-f\^]+) {\(k\^(?<tbl>[0-9A-Fa-f]+):c\)";
49 public MorkDatabase (string mork_file)
51 this.mork_file = mork_file;
52 this.dicts = new Hashtable ();
53 this.metadicts = new Hashtable ();
54 this.rows = new Hashtable ();
55 this.tables = new Hashtable ();
58 public void Read ()
60 string content;
61 StreamReader reader = new StreamReader (mork_file);;
63 // Check if this is a mork file and save database version if it is. We assume the first line will tell us this.
64 if (!IsValid (reader.ReadLine (), out mork_version)) {
65 reader.Close ();
66 throw new InvalidMorkDatabaseException ("This file is missing a valid mork header");
69 content = reader.ReadToEnd ();
70 reader.Close ();
72 Reset ();
73 Read (content);
76 protected bool IsValid (string header, out string version)
78 version = null;
79 Regex reg = new Regex (@"<!-- <mdb:mork:z v=\""(?<version>(.*))\""/> -->");
81 if (header == null || header == string.Empty)
82 return false;
84 Match m = reg.Match (header);
85 if (!m.Success)
86 return false;
88 version = m.Result ("${version}");
89 return true;
92 protected void Read (string content)
94 int position = -1;
96 while (++position != content.Length) {
98 if (content [position].Equals ('/') && content [position].Equals ('/'))
99 // Ignore comments
100 position = content.IndexOf ('\n', position);
101 else if (content [position].Equals ('<') && content [position+2].Equals ('<'))
102 // Parse metadict information
103 ParseMetaDict (Read (content, ref position, "<(", ")>"));
104 else if (content [position].Equals ('<'))
105 // Parse dict information
106 ParseDict (Read (content, ref position, "<(", ")>"));
107 else if (content [position].Equals ('{')) {
108 // Parse table information
109 ParseTable (Read (content, ref position, "{", "}"));
110 }else if (content [position].Equals ('['))
111 // Parse rows
112 ParseRows (Read (content, ref position, "[", "]"), null, null);
113 else if (content [position].Equals ('@') && content [position+1].Equals ('$'))
114 // Parse groups
115 ParseGroups (Read (content, ref position, "@$${", "@$$}"));
119 protected string Read (string content, ref int position, string start, string end)
121 int tmp = position, start_position = position;
123 do {
124 position = content.IndexOf (end, position+1);
125 if ((tmp = content.IndexOf (start, tmp+1)) < 0)
126 break;
127 } while (tmp < position);
129 return content.Substring (start_position, position-start_position+1);
132 protected virtual void ParseDict (string dict)
134 Regex reg = new Regex (@"(?<id>[0-9A-Fa-f]+)\s*=(?<value>(.*))", RegexOptions.Compiled);
136 // This is sooo lame that, but it's an easy solution that works. It seems like regex fails
137 // here when dealing with big amounts of data.
138 foreach (string t in Regex.Replace (dict.Substring (2, dict.Length-3).Replace ("\\\n", "").
139 Replace ("\n", ""), @"\)\s*\(", "\n").Split ('\n')) {
141 Match m = reg.Match (t);
142 if (m.Success)
143 dicts [m.Result ("${id}")] = m.Result ("${value}");
147 protected virtual void ParseMetaDict (string metadict)
149 Regex reg = new Regex (@"(?<id>[0-9A-Fa-f]+)=(?<value>[^()]+)", RegexOptions.Compiled);
151 foreach (Match m in reg.Matches (metadict))
152 metadicts [m.Result ("${id}")] = m.Result ("${value}");
155 protected virtual void ParseTable (string table)
157 int start = table.IndexOf ('}')+1;
158 Match m = new Regex (regex_table, RegexOptions.Compiled).Match (table);
160 ParseRows (table.Substring (start, table.Length-start-1), m.Result ("${ns}"), m.Result ("${tbl}"));
163 protected virtual void ParseRows (string rows, string ns, string table)
165 Regex reg = new Regex (regex_row, RegexOptions.Compiled);
167 foreach (Match m in reg.Matches (Clean (rows))) {
168 // tmp [0] == id, tmp [1] == ns
169 string[] tmp = m.Result ("${roid}").Split (':');
171 if (m.Result ("${action}") == "-" || m.Result ("${cells}") == string.Empty)
172 RemoveRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns));
173 else
174 AddRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns), table, m.Result ("${cells}"));
178 protected virtual void ParseGroups (string groups)
180 int start = groups.IndexOf ("{@")+2;
182 Read (groups.Substring (start, groups.Length-start-1));
185 protected string Clean (string str)
187 return str.Replace ("\n", "").Replace (" ", "");
190 public string ParseNamespace (string ns)
192 if (ns == null || ns == string.Empty)
193 return string.Empty;
194 if (ns.StartsWith ("^"))
195 return ns;
196 else {
197 foreach (string key in metadicts.Keys)
198 if ((metadicts [key] as string) == ns)
199 return String.Format ("^{0}", key);
202 return ns;
205 public void AddRow (string id, string ns, string table, string cells)
207 string ns2 = ParseNamespace (ns);
209 if (id == string.Empty || ns2 == string.Empty || table == string.Empty || cells == string.Empty)
210 return;
211 else if (!rows.ContainsKey (ns2))
212 rows [ns2] = new Hashtable ();
214 (rows [ns2] as Hashtable) [id] = (Exists (id, ns2) ? String.Concat (cells, GetCells (id, ns2)) : cells);
216 if (!tables.ContainsKey (id))
217 tables [id] = table;
220 public void RemoveRow (string id, string ns)
222 string ns2 = ParseNamespace (ns);
224 if (!rows.ContainsKey (ns2))
225 return;
227 (rows [ns2] as Hashtable).Remove (id);
228 tables.Remove (id);
231 public string GetCells (string id, string ns)
233 string ns2 = ParseNamespace (ns);
235 return (ns2 != null ?(rows [ns2] as Hashtable) [id] as string : null);
238 public Hashtable Compile (string id, string ns)
240 string ns2 = ParseNamespace (ns);
242 if (!Exists (id, ns2))
243 return null;
245 Hashtable tbl = new Hashtable ();
246 Regex reg = new Regex (regex_cell, RegexOptions.Compiled);
248 foreach (Match m in reg.Matches (GetCells (id, ns2))) {
249 string value = (string) (m.Result ("${pvalue}") != string.Empty ?
250 dicts [m.Result("${pvalue}")] : m.Result ("${value}"));
252 tbl [metadicts [m.Result ("${key}")]] = Decode (value, Encoding);
255 tbl ["id"] = id;
256 tbl ["table"] = tables [id];
258 return tbl;
261 public bool Exists (string id, string ns)
263 string ns2 = ParseNamespace (ns);
265 return (ns2 != null ? (rows [ns] as Hashtable).ContainsKey (id) : false);
268 public int GetRowCount (string ns)
270 string ns2 = ParseNamespace (ns);
272 if (ns2 == null || rows [ns2] == null)
273 return -1;
275 return (rows [ns2] as Hashtable).Count;
278 public int GetRowCount (string ns, string table)
280 int count = 0;
281 string ns2 = ParseNamespace (ns);
283 if (ns2 == null || rows [ns2] == null)
284 return -1;
286 foreach (string id in (rows [ns2] as Hashtable).Keys) {
287 if ((string) tables [id] == table)
288 count++;
291 return count;
294 public IEnumerator GetEnumerator ()
296 string ns = ParseNamespace (EnumNamespace);
298 if (ns == null || (rows [ns] as Hashtable) == null || Empty)
299 return null;
301 return (rows [ns] as Hashtable).Keys.GetEnumerator ();
304 public void Reset ()
306 dicts.Clear ();
307 metadicts.Clear ();
308 rows.Clear ();
309 tables.Clear ();
310 mork_version = string.Empty;
313 public static string Convert (int char1, int char2, System.Text.Encoding to_encoding)
315 byte[] bytes;
316 System.Text.Encoding from;
318 if (char2 == -1) {
319 from = System.Text.Encoding.UTF7;
320 bytes = new byte[] { System.Convert.ToByte (char1) };
321 } else {
322 from = System.Text.Encoding.UTF8;
323 bytes = new byte[] { System.Convert.ToByte (char1), System.Convert.ToByte (char2) };
326 return to_encoding.GetString (System.Text.Encoding.Convert (from, to_encoding, bytes));
329 public static string Decode (string str, System.Text.Encoding to_encoding)
331 if (str == null || str == string.Empty || to_encoding == null || str.IndexOf ('$') == -1)
332 return str;
334 foreach (Match m in Regex.Matches (str, @"\$(?<1>[0-9A-F]{2})\$(?<2>[0-9A-F]{2})|\$(?<3>[0-9A-F]{2})")) {
335 string char1 = m.Result ("${1}"), char2 = m.Result ("${2}"), char3 = m.Result ("${3}");
337 if (char1 != string.Empty) {
338 str = str.Replace (String.Format (@"${0}${1}", char1, char2),
339 Convert (Thunderbird.Hex2Dec (char1), Thunderbird.Hex2Dec (char2), to_encoding));
340 } else {
341 str = str.Replace (String.Format (@"${0}", char3),
342 Convert (Thunderbird.Hex2Dec (char3), -1, to_encoding));
346 return str;
349 public int Rows {
350 get {
351 int count = 0;
353 foreach (Hashtable r in rows.Values)
354 count += r.Count;
356 return count;
360 public string EnumNamespace {
361 get { return enum_namespace; }
362 set { enum_namespace = value; }
365 public string Filename {
366 get { return mork_file; }
369 public string Version {
370 get { return mork_version; }
373 // There will always exist an item with id 1 in namespace 80, which means
374 // that when there are less than two items in the database, it's empty
375 public bool Empty {
376 get { return (rows.Count > 1 ? false : true); }
379 public System.Text.Encoding Encoding {
380 get {
381 System.Text.Encoding encoding;
383 try {
384 encoding = System.Text.Encoding.GetEncoding ((string) metadicts ["f"]);
385 } catch {
386 encoding = System.Text.Encoding.GetEncoding ("iso-8859-1");
389 return encoding;
394 public class InvalidMorkDatabaseException : System.Exception {
396 public InvalidMorkDatabaseException (string message) : base (message)