2 // Mork.cs: A parser for mork files (used by software such as Firefox and Thunderbird)
4 // Copyright (C) 2006 Pierre Östlund
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 using System
.Collections
;
31 using System
.Text
.RegularExpressions
;
35 public class MorkDatabase
: IEnumerable
{
36 protected string mork_file
;
37 protected string enum_namespace
;
38 protected string mork_version
;
40 protected Hashtable dicts
;
41 protected Hashtable metadicts
;
42 protected Hashtable rows
;
43 protected Hashtable tables
;
45 protected string regex_row
= @"(?<action>[-!+]?)\[(-|)(?<roid>[0-9A-Za-z:\^]+)(?<cells>(?>[^\[\]]+)?)\]";
46 protected string regex_cell
= @"\^(?<key>[0-9A-Fa-f]+)(\^(?<pvalue>[0-9A-Fa-f]+)|=(?<value>[0-9A-Fa-f]+))";
47 protected string regex_table
= @"{.*?:(?<ns>[0-9A-Fa-f\^]+) {\(k\^(?<tbl>[0-9A-Fa-f]+):c\)";
49 public MorkDatabase (string mork_file
)
51 this.mork_file
= mork_file
;
52 this.dicts
= new Hashtable ();
53 this.metadicts
= new Hashtable ();
54 this.rows
= new Hashtable ();
55 this.tables
= new Hashtable ();
61 StreamReader reader
= new StreamReader (mork_file
);;
63 // Check if this is a mork file and save database version if it is. We assume the first line will tell us this.
64 if (!IsValid (reader
.ReadLine (), out mork_version
)) {
66 throw new InvalidMorkDatabaseException ("This file is missing a valid mork header");
69 content
= reader
.ReadToEnd ();
76 protected bool IsValid (string header
, out string version
)
79 Regex reg
= new Regex (@"<!-- <mdb:mork:z v=\""(?<version>(.*))\""/> -->");
81 if (header == null || header == string.Empty)
84 Match m = reg.Match (header);
88 version = m.Result ("${version}
");
92 protected void Read (string content)
96 while (++position != content.Length) {
98 if (content [position].Equals ('/') && content [position].Equals ('/'))
100 position = content.IndexOf ('\n', position);
101 else if (content [position].Equals ('<') && content [position+2].Equals ('<'))
102 // Parse metadict information
103 ParseMetaDict (Read (content, ref position, "<(", ")>"));
104 else if (content [position].Equals ('<'))
105 // Parse dict information
106 ParseDict (Read (content, ref position, "<(", ")>"));
107 else if (content [position].Equals ('{')) {
108 // Parse table information
109 ParseTable (Read (content, ref position, "{", "}
"));
110 }else if (content [position].Equals ('['))
112 ParseRows (Read (content, ref position, "[", "]"), null, null);
113 else if (content [position].Equals ('@') && content [position+1].Equals ('$'))
115 ParseGroups (Read (content, ref position, "@$${", "@$$}
"));
119 protected string Read (string content, ref int position, string start, string end)
121 int tmp = position, start_position = position;
124 position = content.IndexOf (end, position+1);
125 if ((tmp = content.IndexOf (start, tmp+1)) < 0)
127 } while (tmp < position);
129 return content.Substring (start_position, position-start_position+1);
132 protected virtual void ParseDict (string dict)
134 Regex reg = new Regex (@"(?<id
>[0-9A
-Fa
-f
]+)\s
*=(?<value>(.*))", RegexOptions.Compiled);
136 // This is sooo lame that, but it's an easy solution that works. It seems like regex fails
137 // here when dealing with big amounts of data.
138 foreach (string t in Regex.Replace (dict.Substring (2, dict.Length-3).Replace ("\\\n", "").
139 Replace ("\n", ""), @"\
)\s
*\
(", "\n").Split ('\n')) {
141 Match m = reg.Match (t);
143 dicts [m.Result ("${id}
")] = m.Result ("${value}
");
147 protected virtual void ParseMetaDict (string metadict)
149 Regex reg = new Regex (@"(?<id
>[0-9A
-Fa
-f
]+)=(?<value>[^
()]+)", RegexOptions.Compiled);
151 foreach (Match m in reg.Matches (metadict))
152 metadicts [m.Result ("${id}
")] = m.Result ("${value}
");
155 protected virtual void ParseTable (string table)
157 int start = table.IndexOf ('}')+1;
158 Match m = new Regex (regex_table, RegexOptions.Compiled).Match (table);
160 ParseRows (table.Substring (start, table.Length-start-1), m.Result ("${ns}
"), m.Result ("${tbl}
"));
163 protected virtual void ParseRows (string rows, string ns, string table)
165 Regex reg = new Regex (regex_row, RegexOptions.Compiled);
167 foreach (Match m in reg.Matches (Clean (rows))) {
168 // tmp [0] == id, tmp [1] == ns
169 string[] tmp = m.Result ("${roid}
").Split (':');
171 if (m.Result ("${action}
") == "-" || m.Result ("${cells}
") == string.Empty)
172 RemoveRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns));
174 AddRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns), table, m.Result ("${cells}
"));
178 protected virtual void ParseGroups (string groups)
180 int start = groups.IndexOf ("{@")+2;
182 Read (groups.Substring (start, groups.Length-start-1));
185 protected string Clean (string str)
187 return str.Replace ("\n", "").Replace (" ", "");
190 public string ParseNamespace (string ns)
192 if (ns.StartsWith ("^
"))
195 foreach (string key in metadicts.Keys)
196 if ((metadicts [key] as string) == ns)
197 return String.Format ("^{0}
", key);
203 public void AddRow (string id, string ns, string table, string cells)
205 string ns2 = ParseNamespace (ns);
207 if (id == string.Empty || ns2 == string.Empty || table == string.Empty || cells == string.Empty)
209 else if (!rows.ContainsKey (ns2))
210 rows [ns2] = new Hashtable ();
212 (rows [ns2] as Hashtable) [id] = (Exists (id, ns2) ? String.Concat (cells, GetCells (id, ns2)) : cells);
214 if (!tables.ContainsKey (id))
218 public void RemoveRow (string id, string ns)
220 string ns2 = ParseNamespace (ns);
222 if (!rows.ContainsKey (ns2))
225 (rows [ns2] as Hashtable).Remove (id);
229 public string GetCells (string id, string ns)
231 string ns2 = ParseNamespace (ns);
233 return (ns2 != null ?(rows [ns2] as Hashtable) [id] as string : null);
236 public Hashtable Compile (string id, string ns)
238 string ns2 = ParseNamespace (ns);
240 if (!Exists (id, ns2))
243 Hashtable tbl = new Hashtable ();
244 Regex reg = new Regex (regex_cell, RegexOptions.Compiled);
246 foreach (Match m in reg.Matches (GetCells (id, ns2))) {
247 string value = (string) (m.Result ("${pvalue}
") != string.Empty ?
248 dicts [m.Result("${pvalue}
")] : m.Result ("${value}
"));
250 tbl [metadicts [m.Result ("${key}
")]] = Decode (value, Encoding);
254 tbl ["table
"] = tables [id];
259 public bool Exists (string id, string ns)
261 string ns2 = ParseNamespace (ns);
263 return (ns2 != null ? (rows [ns] as Hashtable).ContainsKey (id) : false);
266 public int GetRowCount (string ns)
268 string ns2 = ParseNamespace (ns);
270 if (ns2 == null || rows [ns2] == null)
273 return (rows [ns2] as Hashtable).Count;
276 public int GetRowCount (string ns, string table)
279 string ns2 = ParseNamespace (ns);
281 if (ns2 == null || rows [ns2] == null)
284 foreach (string id in (rows [ns2] as Hashtable).Keys) {
285 if ((string) tables [id] == table)
292 public IEnumerator GetEnumerator ()
294 string ns = ParseNamespace (EnumNamespace);
296 if (ns == null || (rows [ns] as Hashtable) == null || Empty)
299 return (rows [ns] as Hashtable).Keys.GetEnumerator ();
308 mork_version = string.Empty;
311 public static string Convert (int char1, int char2, System.Text.Encoding to_encoding)
314 System.Text.Encoding from;
317 from = System.Text.Encoding.UTF7;
318 bytes = new byte[] { System.Convert.ToByte (char1) };
320 from = System.Text.Encoding.UTF8;
321 bytes = new byte[] { System.Convert.ToByte (char1), System.Convert.ToByte (char2) };
324 return to_encoding.GetString (System.Text.Encoding.Convert (from, to_encoding, bytes));
327 public static string Decode (string str, System.Text.Encoding to_encoding)
329 if (str == null || str == string.Empty || to_encoding == null || str.IndexOf ('$') == -1)
332 foreach (Match m in Regex.Matches (str, @"\
$(?<1>[0-9A
-F
]{2}
)\
$(?<2>[0-9A
-F
]{2}
)|\
$(?<3>[0-9A
-F
]{2}
)")) {
333 string char1 = m.Result ("${1}
"), char2 = m.Result ("${2}
"), char3 = m.Result ("${3}
");
335 if (char1 != string.Empty) {
336 str = str.Replace (String.Format (@"${0}
${1}
", char1, char2),
337 Convert (Thunderbird.Hex2Dec (char1), Thunderbird.Hex2Dec (char2), to_encoding));
339 str = str.Replace (String.Format (@"${0}
", char3),
340 Convert (Thunderbird.Hex2Dec (char3), -1, to_encoding));
351 foreach (Hashtable r in rows.Values)
358 public string EnumNamespace {
359 get { return enum_namespace; }
360 set { enum_namespace = value; }
363 public string Filename {
364 get { return mork_file; }
367 public string Version {
368 get { return mork_version; }
371 // There will always exist an item with id 1 in namespace 80, which means
372 // that when there are less than two items in the database, it's empty
374 get { return (rows.Count > 1 ? false : true); }
377 public System.Text.Encoding Encoding {
379 System.Text.Encoding encoding;
382 encoding = System.Text.Encoding.GetEncoding ((string) metadicts ["f
"]);
384 encoding = System.Text.Encoding.GetEncoding ("iso
-8859-1");
392 public class InvalidMorkDatabaseException : System.Exception {
394 public InvalidMorkDatabaseException (string message) : base (message)