Yet another. Init the gobject type system.
[beagle.git] / Util / Mork.cs
blob99e950af7d7d9c481b81ff2c438c98963078d959
1 //
2 // Mork.cs: A parser for mork files (used by software such as Firefox and Thunderbird)
3 //
4 // Copyright (C) 2006 Pierre Östlund
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Text;
30 using System.Collections;
31 using System.Text.RegularExpressions;
33 namespace Beagle.Util
35 public class MorkDatabase : IEnumerable {
36 protected string mork_file;
37 protected string enum_namespace;
38 protected string mork_version;
40 protected Hashtable dicts;
41 protected Hashtable metadicts;
42 protected Hashtable rows;
43 protected Hashtable tables;
45 protected string regex_row = @"(?<action>[-!+]?)\[(-|)(?<roid>[0-9A-Za-z:\^]+)(?<cells>(?>[^\[\]]+)?)\]";
46 protected string regex_cell = @"\^(?<key>[0-9A-Fa-f]+)(\^(?<pvalue>[0-9A-Fa-f]+)|=(?<value>[0-9A-Fa-f]+))";
47 protected string regex_table = @"{.*?:(?<ns>[0-9A-Fa-f\^]+) {\(k\^(?<tbl>[0-9A-Fa-f]+):c\)";
49 public MorkDatabase (string mork_file)
51 this.mork_file = mork_file;
52 this.dicts = new Hashtable ();
53 this.metadicts = new Hashtable ();
54 this.rows = new Hashtable ();
55 this.tables = new Hashtable ();
58 public void Read ()
60 string content;
61 StreamReader reader = new StreamReader (mork_file);;
63 // Check if this is a mork file and save database version if it is. We assume the first line will tell us this.
64 if (!IsValid (reader.ReadLine (), out mork_version)) {
65 reader.Close ();
66 throw new InvalidMorkDatabaseException ("This file is missing a valid mork header");
69 content = reader.ReadToEnd ();
70 reader.Close ();
72 Reset ();
73 Read (content);
76 protected bool IsValid (string header, out string version)
78 version = null;
79 Regex reg = new Regex (@"<!-- <mdb:mork:z v=\""(?<version>(.*))\""/> -->");
81 if (header == null || header == string.Empty)
82 return false;
84 Match m = reg.Match (header);
85 if (!m.Success)
86 return false;
88 version = m.Result ("${version}");
89 return true;
92 protected void Read (string content)
94 int position = -1;
96 while (++position != content.Length) {
98 if (content [position].Equals ('/') && content [position].Equals ('/'))
99 // Ignore comments
100 position = content.IndexOf ('\n', position);
101 else if (content [position].Equals ('<') && content [position+2].Equals ('<'))
102 // Parse metadict information
103 ParseMetaDict (Read (content, ref position, "<(", ")>"));
104 else if (content [position].Equals ('<'))
105 // Parse dict information
106 ParseDict (Read (content, ref position, "<(", ")>"));
107 else if (content [position].Equals ('{')) {
108 // Parse table information
109 ParseTable (Read (content, ref position, "{", "}"));
110 }else if (content [position].Equals ('['))
111 // Parse rows
112 ParseRows (Read (content, ref position, "[", "]"), null, null);
113 else if (content [position].Equals ('@') && content [position+1].Equals ('$'))
114 // Parse groups
115 ParseGroups (Read (content, ref position, "@$${", "@$$}"));
119 protected string Read (string content, ref int position, string start, string end)
121 int tmp = position, start_position = position;
123 do {
124 position = content.IndexOf (end, position+1);
125 if ((tmp = content.IndexOf (start, tmp+1)) < 0)
126 break;
127 } while (tmp < position);
129 return content.Substring (start_position, position-start_position+1);
132 protected virtual void ParseDict (string dict)
134 Regex reg = new Regex (@"(?<id>[0-9A-Fa-f]+)\s*=(?<value>(.*))", RegexOptions.Compiled);
136 // This is sooo lame that, but it's an easy solution that works. It seems like regex fails
137 // here when dealing with big amounts of data.
138 foreach (string t in Regex.Replace (dict.Substring (2, dict.Length-3).Replace ("\\\n", "").
139 Replace ("\n", ""), @"\)\s*\(", "\n").Split ('\n')) {
141 Match m = reg.Match (t);
142 if (m.Success)
143 dicts [m.Result ("${id}")] = m.Result ("${value}");
147 protected virtual void ParseMetaDict (string metadict)
149 Regex reg = new Regex (@"(?<id>[0-9A-Fa-f]+)=(?<value>[^()]+)", RegexOptions.Compiled);
151 foreach (Match m in reg.Matches (metadict))
152 metadicts [m.Result ("${id}")] = m.Result ("${value}");
155 protected virtual void ParseTable (string table)
157 int start = table.IndexOf ('}')+1;
158 Match m = new Regex (regex_table, RegexOptions.Compiled).Match (table);
160 ParseRows (table.Substring (start, table.Length-start-1), m.Result ("${ns}"), m.Result ("${tbl}"));
163 protected virtual void ParseRows (string rows, string ns, string table)
165 Regex reg = new Regex (regex_row, RegexOptions.Compiled);
167 foreach (Match m in reg.Matches (Clean (rows))) {
168 // tmp [0] == id, tmp [1] == ns
169 string[] tmp = m.Result ("${roid}").Split (':');
171 if (m.Result ("${action}") == "-" || m.Result ("${cells}") == string.Empty)
172 RemoveRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns));
173 else
174 AddRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns), table, m.Result ("${cells}"));
178 protected virtual void ParseGroups (string groups)
180 int start = groups.IndexOf ("{@")+2;
182 Read (groups.Substring (start, groups.Length-start-1));
185 protected string Clean (string str)
187 return str.Replace ("\n", "").Replace (" ", "");
190 public string ParseNamespace (string ns)
192 if (ns.StartsWith ("^"))
193 return ns;
194 else {
195 foreach (string key in metadicts.Keys)
196 if ((metadicts [key] as string) == ns)
197 return String.Format ("^{0}", key);
200 return ns;
203 public void AddRow (string id, string ns, string table, string cells)
205 string ns2 = ParseNamespace (ns);
207 if (id == string.Empty || ns2 == string.Empty || table == string.Empty || cells == string.Empty)
208 return;
209 else if (!rows.ContainsKey (ns2))
210 rows [ns2] = new Hashtable ();
212 (rows [ns2] as Hashtable) [id] = (Exists (id, ns2) ? String.Concat (cells, GetCells (id, ns2)) : cells);
214 if (!tables.ContainsKey (id))
215 tables [id] = table;
218 public void RemoveRow (string id, string ns)
220 string ns2 = ParseNamespace (ns);
222 if (!rows.ContainsKey (ns2))
223 return;
225 (rows [ns2] as Hashtable).Remove (id);
226 tables.Remove (id);
229 public string GetCells (string id, string ns)
231 string ns2 = ParseNamespace (ns);
233 return (ns2 != null ?(rows [ns2] as Hashtable) [id] as string : null);
236 public Hashtable Compile (string id, string ns)
238 string ns2 = ParseNamespace (ns);
240 if (!Exists (id, ns2))
241 return null;
243 Hashtable tbl = new Hashtable ();
244 Regex reg = new Regex (regex_cell, RegexOptions.Compiled);
246 foreach (Match m in reg.Matches (GetCells (id, ns2))) {
247 string value = (string) (m.Result ("${pvalue}") != string.Empty ?
248 dicts [m.Result("${pvalue}")] : m.Result ("${value}"));
250 tbl [metadicts [m.Result ("${key}")]] = Decode (value, Encoding);
253 tbl ["id"] = id;
254 tbl ["table"] = tables [id];
256 return tbl;
259 public bool Exists (string id, string ns)
261 string ns2 = ParseNamespace (ns);
263 return (ns2 != null ? (rows [ns] as Hashtable).ContainsKey (id) : false);
266 public int GetRowCount (string ns)
268 string ns2 = ParseNamespace (ns);
270 if (ns2 == null || rows [ns2] == null)
271 return -1;
273 return (rows [ns2] as Hashtable).Count;
276 public int GetRowCount (string ns, string table)
278 int count = 0;
279 string ns2 = ParseNamespace (ns);
281 if (ns2 == null || rows [ns2] == null)
282 return -1;
284 foreach (string id in (rows [ns2] as Hashtable).Keys) {
285 if ((string) tables [id] == table)
286 count++;
289 return count;
292 public IEnumerator GetEnumerator ()
294 string ns = ParseNamespace (EnumNamespace);
296 if (ns == null || (rows [ns] as Hashtable) == null || Empty)
297 return null;
299 return (rows [ns] as Hashtable).Keys.GetEnumerator ();
302 public void Reset ()
304 dicts.Clear ();
305 metadicts.Clear ();
306 rows.Clear ();
307 tables.Clear ();
308 mork_version = string.Empty;
311 public static string Convert (int char1, int char2, System.Text.Encoding to_encoding)
313 byte[] bytes;
314 System.Text.Encoding from;
316 if (char2 == -1) {
317 from = System.Text.Encoding.UTF7;
318 bytes = new byte[] { System.Convert.ToByte (char1) };
319 } else {
320 from = System.Text.Encoding.UTF8;
321 bytes = new byte[] { System.Convert.ToByte (char1), System.Convert.ToByte (char2) };
324 return to_encoding.GetString (System.Text.Encoding.Convert (from, to_encoding, bytes));
327 public static string Decode (string str, System.Text.Encoding to_encoding)
329 if (str == null || str == string.Empty || to_encoding == null || str.IndexOf ('$') == -1)
330 return str;
332 foreach (Match m in Regex.Matches (str, @"\$(?<1>[0-9A-F]{2})\$(?<2>[0-9A-F]{2})|\$(?<3>[0-9A-F]{2})")) {
333 string char1 = m.Result ("${1}"), char2 = m.Result ("${2}"), char3 = m.Result ("${3}");
335 if (char1 != string.Empty) {
336 str = str.Replace (String.Format (@"${0}${1}", char1, char2),
337 Convert (Thunderbird.Hex2Dec (char1), Thunderbird.Hex2Dec (char2), to_encoding));
338 } else {
339 str = str.Replace (String.Format (@"${0}", char3),
340 Convert (Thunderbird.Hex2Dec (char3), -1, to_encoding));
344 return str;
347 public int Rows {
348 get {
349 int count = 0;
351 foreach (Hashtable r in rows.Values)
352 count += r.Count;
354 return count;
358 public string EnumNamespace {
359 get { return enum_namespace; }
360 set { enum_namespace = value; }
363 public string Filename {
364 get { return mork_file; }
367 public string Version {
368 get { return mork_version; }
371 // There will always exist an item with id 1 in namespace 80, which means
372 // that when there are less than two items in the database, it's empty
373 public bool Empty {
374 get { return (rows.Count > 1 ? false : true); }
377 public System.Text.Encoding Encoding {
378 get {
379 System.Text.Encoding encoding;
381 try {
382 encoding = System.Text.Encoding.GetEncoding ((string) metadicts ["f"]);
383 } catch {
384 encoding = System.Text.Encoding.GetEncoding ("iso-8859-1");
387 return encoding;
392 public class InvalidMorkDatabaseException : System.Exception {
394 public InvalidMorkDatabaseException (string message) : base (message)