Some more fixes wrt child-indexables. Namely, fix proper handling of child indexables...
[beagle.git] / Util / Mork.cs
blob60d6a08904e2a6ab38431d7717304055581c14ec
1 //
2 // Mork.cs: A parser for mork files (used by software such as Firefox and Thunderbird)
3 //
4 // Copyright (C) 2006 Pierre Östlund
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Text;
30 using System.Collections;
31 using System.Text.RegularExpressions;
33 namespace Beagle.Util
35 public class MorkDatabase : IEnumerable {
36 protected string mork_file;
37 protected string enum_namespace;
38 protected string mork_version;
40 protected Hashtable dicts;
41 protected Hashtable metadicts;
42 protected Hashtable rows;
43 protected Hashtable tables;
45 protected string regex_row = @"(?<action>[-!+]?)\[(-|)(?<roid>[0-9A-Za-z:\^]+)(?<cells>(?>[^\[\]]+)?)\]";
46 protected string regex_cell = @"\^(?<key>[0-9A-Fa-f]+)(\^(?<pvalue>[0-9A-Fa-f]+)|=(?<value>[0-9A-Fa-f]+))";
47 protected string regex_table = @"{.*?:(?<ns>[0-9A-Fa-f\^]+) {\(k\^(?<tbl>[0-9A-Fa-f]+):c\)";
49 public MorkDatabase (string mork_file)
51 this.mork_file = mork_file;
52 this.dicts = new Hashtable ();
53 this.metadicts = new Hashtable ();
54 this.rows = new Hashtable ();
55 this.tables = new Hashtable ();
58 public void Read ()
60 string content;
61 StreamReader reader = new StreamReader (mork_file);;
63 // Check if this is a mork file and save database version if it is. We assume the first line will tell us this.
64 if (!IsValid (reader.ReadLine (), out mork_version)) {
65 reader.Close ();
66 throw new InvalidMorkDatabaseException ("This file is missing a valid mork header");
69 content = reader.ReadToEnd ();
70 reader.Close ();
72 Reset ();
73 Read (content);
76 protected bool IsValid (string header, out string version)
78 version = null;
79 Regex reg = new Regex (@"<!-- <mdb:mork:z v=\""(?<version>(.*))\""/> -->");
81 if (header == null || header == string.Empty)
82 return false;
84 Match m = reg.Match (header);
85 if (!m.Success)
86 return false;
88 version = m.Result ("${version}");
89 return true;
92 protected void Read (string content)
94 int position = -1;
96 while (++position != content.Length) {
98 if (content [position].Equals ('/') && content [position].Equals ('/'))
99 // Ignore comments
100 position = content.IndexOf ('\n', position);
101 else if (content [position].Equals ('<') && content [position+2].Equals ('<'))
102 // Parse metadict information
103 ParseMetaDict (FindStartIndex (content, ref position, "<(", ")>"), position, content);
104 else if (content [position].Equals ('<'))
105 // Parse dict information
106 ParseDict (FindStartIndex (content, ref position, "<(", ")>"),position, content);
107 else if (content [position].Equals ('{')) {
108 // Parse table information
109 ParseTable (Read (content, ref position, "{", "}"));
110 }else if (content [position].Equals ('['))
111 // Parse rows
112 ParseRows (Read (content, ref position, "[", "]"), null, null);
113 else if (content [position].Equals ('@') && content [position+1].Equals ('$'))
114 // Parse groups
115 ParseGroups (Read (content, ref position, "@$${", "@$$}"));
119 protected string Read (string content, ref int position, string start, string end)
121 int tmp = position, start_position = position;
123 do {
124 position = content.IndexOf (end, position+1);
125 if ((tmp = content.IndexOf (start, tmp+1)) < 0)
126 break;
127 } while (tmp < position);
129 return content.Substring (start_position, position-start_position+1);
131 // This method is complex, and quite hacky, but it basically returns the index of the beginning
132 // of the substring, and points position to the end of the substring. Which I use in ParseDict
133 // and ParseMetaDict to significantly reduce the number of string allocations we are making.
134 protected int FindStartIndex (string content, ref int position, string start, string end)
136 int tmp = position, start_position = position;
138 do {
139 position = content.IndexOf (end, position+1);
140 if ((tmp = content.IndexOf (start, tmp+1)) < 0)
141 break;
142 } while (tmp < position);
144 return start_position;
147 protected virtual void ParseDict (int start, int end, string dict)
149 Regex reg = new Regex (@"(?<id>[0-9A-Fa-f]+)\s*=(?<value>(.*))", RegexOptions.Compiled);
151 // This is sooo lame that, but it's an easy solution that works. It seems like regex fails
152 // here when dealing with big amounts of data.
153 foreach (string t in Regex.Replace (dict.Substring (start+2,(end-start)-3).Replace ("\\\n", "").
154 Replace ("\n", ""), @"\)\s*\(", "\n").Split ('\n')) {
156 Match m = reg.Match (t);
157 if (m.Success)
158 dicts [m.Result ("${id}")] = m.Result ("${value}");
162 protected virtual void ParseMetaDict (int start, int end, string content)
164 Regex reg = new Regex (@"(?<id>[0-9A-Fa-f]+)=(?<value>[^()]+)", RegexOptions.Compiled);
166 foreach (Match m in reg.Matches (content.Substring(start,end-start+1)))
167 metadicts [m.Result ("${id}")] = m.Result ("${value}");
170 protected virtual void ParseTable (string table)
172 int start = table.IndexOf ('}')+1;
173 Match m = new Regex (regex_table, RegexOptions.Compiled).Match (table);
175 ParseRows (table.Substring (start, table.Length-start-1), m.Result ("${ns}"), m.Result ("${tbl}"));
178 protected virtual void ParseRows (string rows, string ns, string table)
180 Regex reg = new Regex (regex_row, RegexOptions.Compiled);
182 foreach (Match m in reg.Matches (Clean (rows))) {
183 // tmp [0] == id, tmp [1] == ns
184 string[] tmp = m.Result ("${roid}").Split (':');
186 if (m.Result ("${action}") == "-" || m.Result ("${cells}") == string.Empty)
187 RemoveRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns));
188 else
189 AddRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns), table, m.Result ("${cells}"));
193 protected virtual void ParseGroups (string groups)
195 int start = groups.IndexOf ("{@")+2;
196 groups =groups.Substring (start, groups.Length-start-1);
197 Read (groups);
200 protected string Clean (string str)
202 return str.Replace ("\n", "").Replace (" ", "");
205 public string ParseNamespace (string ns)
207 if (ns == null || ns == string.Empty)
208 return string.Empty;
209 if (ns.StartsWith ("^"))
210 return ns;
211 else {
212 foreach (string key in metadicts.Keys)
213 if ((metadicts [key] as string) == ns)
214 return String.Format ("^{0}", key);
217 return ns;
220 public void AddRow (string id, string ns, string table, string cells)
222 string ns2 = ParseNamespace (ns);
224 if (id == string.Empty || ns2 == string.Empty || table == string.Empty || cells == string.Empty)
225 return;
226 else if (!rows.ContainsKey (ns2))
227 rows [ns2] = new Hashtable ();
229 (rows [ns2] as Hashtable) [id] = (Exists (id, ns2) ? String.Concat (cells, GetCells (id, ns2)) : cells);
231 if (!tables.ContainsKey (id))
232 tables [id] = table;
235 public void RemoveRow (string id, string ns)
237 string ns2 = ParseNamespace (ns);
239 if (!rows.ContainsKey (ns2))
240 return;
242 (rows [ns2] as Hashtable).Remove (id);
243 tables.Remove (id);
246 public string GetCells (string id, string ns)
248 string ns2 = ParseNamespace (ns);
250 return (ns2 != null ?(rows [ns2] as Hashtable) [id] as string : null);
253 public Hashtable Compile (string id, string ns)
255 string ns2 = ParseNamespace (ns);
257 if (!Exists (id, ns2))
258 return null;
260 Hashtable tbl = new Hashtable ();
261 Regex reg = new Regex (regex_cell, RegexOptions.Compiled);
263 foreach (Match m in reg.Matches (GetCells (id, ns2))) {
264 string value = (string) (m.Result ("${pvalue}") != string.Empty ?
265 dicts [m.Result("${pvalue}")] : m.Result ("${value}"));
267 tbl [metadicts [m.Result ("${key}")]] = Decode (value, Encoding);
270 tbl ["id"] = id;
271 tbl ["table"] = tables [id];
273 return tbl;
276 public bool Exists (string id, string ns)
278 string ns2 = ParseNamespace (ns);
280 return (ns2 != null ? (rows [ns] as Hashtable).ContainsKey (id) : false);
283 public int GetRowCount (string ns)
285 string ns2 = ParseNamespace (ns);
287 if (ns2 == null || rows [ns2] == null)
288 return -1;
290 return (rows [ns2] as Hashtable).Count;
293 public int GetRowCount (string ns, string table)
295 int count = 0;
296 string ns2 = ParseNamespace (ns);
298 if (ns2 == null || rows [ns2] == null)
299 return -1;
301 foreach (string id in (rows [ns2] as Hashtable).Keys) {
302 if ((string) tables [id] == table)
303 count++;
306 return count;
309 public IEnumerator GetEnumerator ()
311 string ns = ParseNamespace (EnumNamespace);
313 if (ns == null || (rows [ns] as Hashtable) == null || Empty)
314 return null;
316 return (rows [ns] as Hashtable).Keys.GetEnumerator ();
319 public void Reset ()
321 dicts.Clear ();
322 metadicts.Clear ();
323 rows.Clear ();
324 tables.Clear ();
325 mork_version = string.Empty;
328 public static string Convert (int char1, int char2, System.Text.Encoding to_encoding)
330 byte[] bytes;
331 System.Text.Encoding from;
333 if (char2 == -1) {
334 from = System.Text.Encoding.UTF7;
335 bytes = new byte[] { System.Convert.ToByte (char1) };
336 } else {
337 from = System.Text.Encoding.UTF8;
338 bytes = new byte[] { System.Convert.ToByte (char1), System.Convert.ToByte (char2) };
341 return to_encoding.GetString (System.Text.Encoding.Convert (from, to_encoding, bytes));
344 public static string Decode (string str, System.Text.Encoding to_encoding)
346 if (str == null || str == string.Empty || to_encoding == null || str.IndexOf ('$') == -1)
347 return str;
349 foreach (Match m in Regex.Matches (str, @"\$(?<1>[0-9A-F]{2})\$(?<2>[0-9A-F]{2})|\$(?<3>[0-9A-F]{2})")) {
350 string char1 = m.Result ("${1}"), char2 = m.Result ("${2}"), char3 = m.Result ("${3}");
352 if (char1 != string.Empty) {
353 str = str.Replace (String.Format (@"${0}${1}", char1, char2),
354 Convert (Thunderbird.Hex2Dec (char1), Thunderbird.Hex2Dec (char2), to_encoding));
355 } else {
356 str = str.Replace (String.Format (@"${0}", char3),
357 Convert (Thunderbird.Hex2Dec (char3), -1, to_encoding));
361 return str;
364 public int Rows {
365 get {
366 int count = 0;
368 foreach (Hashtable r in rows.Values)
369 count += r.Count;
371 return count;
375 public string EnumNamespace {
376 get { return enum_namespace; }
377 set { enum_namespace = value; }
380 public string Filename {
381 get { return mork_file; }
384 public string Version {
385 get { return mork_version; }
388 // There will always exist an item with id 1 in namespace 80, which means
389 // that when there are less than two items in the database, it's empty
390 public bool Empty {
391 get { return (rows.Count > 1 ? false : true); }
394 public System.Text.Encoding Encoding {
395 get {
396 System.Text.Encoding encoding;
398 try {
399 encoding = System.Text.Encoding.GetEncoding ((string) metadicts ["f"]);
400 } catch {
401 encoding = System.Text.Encoding.GetEncoding ("iso-8859-1");
404 return encoding;
409 public class InvalidMorkDatabaseException : System.Exception {
411 public InvalidMorkDatabaseException (string message) : base (message)