2 // Mork.cs: A parser for mork files (used by software such as Firefox and Thunderbird)
4 // Copyright (C) 2006 Pierre Östlund
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 using System
.Collections
;
31 using System
.Text
.RegularExpressions
;
35 public class MorkDatabase
: IEnumerable
{
36 protected string mork_file
;
37 protected string enum_namespace
;
38 protected string mork_version
;
40 protected Hashtable dicts
;
41 protected Hashtable metadicts
;
42 protected Hashtable rows
;
43 protected Hashtable tables
;
45 protected string regex_row
= @"(?<action>[-!+]?)\[(-|)(?<roid>[0-9A-Za-z:\^]+)(?<cells>(?>[^\[\]]+)?)\]";
46 protected string regex_cell
= @"\^(?<key>[0-9A-Fa-f]+)(\^(?<pvalue>[0-9A-Fa-f]+)|=(?<value>[0-9A-Fa-f]+))";
47 protected string regex_table
= @"{.*?:(?<ns>[0-9A-Fa-f\^]+) {\(k\^(?<tbl>[0-9A-Fa-f]+):c\)";
49 public MorkDatabase (string mork_file
)
51 this.mork_file
= mork_file
;
52 this.dicts
= new Hashtable ();
53 this.metadicts
= new Hashtable ();
54 this.rows
= new Hashtable ();
55 this.tables
= new Hashtable ();
61 StreamReader reader
= new StreamReader (mork_file
);;
63 // Check if this is a mork file and save database version if it is. We assume the first line will tell us this.
64 if (!IsValid (reader
.ReadLine (), out mork_version
)) {
66 throw new InvalidMorkDatabaseException ("This file is missing a valid mork header");
69 content
= reader
.ReadToEnd ();
76 protected bool IsValid (string header
, out string version
)
79 Regex reg
= new Regex (@"<!-- <mdb:mork:z v=\""(?<version>(.*))\""/> -->");
81 if (header == null || header == string.Empty)
84 Match m = reg.Match (header);
88 version = m.Result ("${version}
");
92 protected void Read (string content)
96 while (++position != content.Length) {
98 if (content [position].Equals ('/') && content [position].Equals ('/'))
100 position = content.IndexOf ('\n', position);
101 else if (content [position].Equals ('<') && content [position+2].Equals ('<'))
102 // Parse metadict information
103 ParseMetaDict (FindStartIndex (content, ref position, "<(", ")>"), position, content);
104 else if (content [position].Equals ('<'))
105 // Parse dict information
106 ParseDict (FindStartIndex (content, ref position, "<(", ")>"),position, content);
107 else if (content [position].Equals ('{')) {
108 // Parse table information
109 ParseTable (Read (content, ref position, "{", "}
"));
110 }else if (content [position].Equals ('['))
112 ParseRows (Read (content, ref position, "[", "]"), null, null);
113 else if (content [position].Equals ('@') && content [position+1].Equals ('$'))
115 ParseGroups (Read (content, ref position, "@$${", "@$$}
"));
119 protected string Read (string content, ref int position, string start, string end)
121 int tmp = position, start_position = position;
124 position = content.IndexOf (end, position+1);
125 if ((tmp = content.IndexOf (start, tmp+1)) < 0)
127 } while (tmp < position);
129 return content.Substring (start_position, position-start_position+1);
131 // This method is complex, and quite hacky, but it basically returns the index of the beginning
132 // of the substring, and points position to the end of the substring. Which I use in ParseDict
133 // and ParseMetaDict to significantly reduce the number of string allocations we are making.
134 protected int FindStartIndex (string content, ref int position, string start, string end)
136 int tmp = position, start_position = position;
139 position = content.IndexOf (end, position+1);
140 if ((tmp = content.IndexOf (start, tmp+1)) < 0)
142 } while (tmp < position);
144 return start_position;
147 protected virtual void ParseDict (int start, int end, string dict)
149 Regex reg = new Regex (@"(?<id
>[0-9A
-Fa
-f
]+)\s
*=(?<value>(.*))", RegexOptions.Compiled);
151 // This is sooo lame that, but it's an easy solution that works. It seems like regex fails
152 // here when dealing with big amounts of data.
153 foreach (string t in Regex.Replace (dict.Substring (start+2,(end-start)-3).Replace ("\\\n", "").
154 Replace ("\n", ""), @"\
)\s
*\
(", "\n").Split ('\n')) {
156 Match m = reg.Match (t);
158 dicts [m.Result ("${id}
")] = m.Result ("${value}
");
162 protected virtual void ParseMetaDict (int start, int end, string content)
164 Regex reg = new Regex (@"(?<id
>[0-9A
-Fa
-f
]+)=(?<value>[^
()]+)", RegexOptions.Compiled);
166 foreach (Match m in reg.Matches (content.Substring(start,end-start+1)))
167 metadicts [m.Result ("${id}
")] = m.Result ("${value}
");
170 protected virtual void ParseTable (string table)
172 int start = table.IndexOf ('}')+1;
173 Match m = new Regex (regex_table, RegexOptions.Compiled).Match (table);
175 ParseRows (table.Substring (start, table.Length-start-1), m.Result ("${ns}
"), m.Result ("${tbl}
"));
178 protected virtual void ParseRows (string rows, string ns, string table)
180 Regex reg = new Regex (regex_row, RegexOptions.Compiled);
182 foreach (Match m in reg.Matches (Clean (rows))) {
183 // tmp [0] == id, tmp [1] == ns
184 string[] tmp = m.Result ("${roid}
").Split (':');
186 if (m.Result ("${action}
") == "-" || m.Result ("${cells}
") == string.Empty)
187 RemoveRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns));
189 AddRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns), table, m.Result ("${cells}
"));
193 protected virtual void ParseGroups (string groups)
195 int start = groups.IndexOf ("{@")+2;
196 groups =groups.Substring (start, groups.Length-start-1);
200 protected string Clean (string str)
202 return str.Replace ("\n", "").Replace (" ", "");
205 public string ParseNamespace (string ns)
207 if (ns == null || ns == string.Empty)
209 if (ns.StartsWith ("^
"))
212 foreach (string key in metadicts.Keys)
213 if ((metadicts [key] as string) == ns)
214 return String.Format ("^{0}
", key);
220 public void AddRow (string id, string ns, string table, string cells)
222 string ns2 = ParseNamespace (ns);
224 if (id == string.Empty || ns2 == string.Empty || table == string.Empty || cells == string.Empty)
226 else if (!rows.ContainsKey (ns2))
227 rows [ns2] = new Hashtable ();
229 (rows [ns2] as Hashtable) [id] = (Exists (id, ns2) ? String.Concat (cells, GetCells (id, ns2)) : cells);
231 if (!tables.ContainsKey (id))
235 public void RemoveRow (string id, string ns)
237 string ns2 = ParseNamespace (ns);
239 if (!rows.ContainsKey (ns2))
242 (rows [ns2] as Hashtable).Remove (id);
246 public string GetCells (string id, string ns)
248 string ns2 = ParseNamespace (ns);
250 return (ns2 != null ?(rows [ns2] as Hashtable) [id] as string : null);
253 public Hashtable Compile (string id, string ns)
255 string ns2 = ParseNamespace (ns);
257 if (!Exists (id, ns2))
260 Hashtable tbl = new Hashtable ();
261 Regex reg = new Regex (regex_cell, RegexOptions.Compiled);
263 foreach (Match m in reg.Matches (GetCells (id, ns2))) {
264 string value = (string) (m.Result ("${pvalue}
") != string.Empty ?
265 dicts [m.Result("${pvalue}
")] : m.Result ("${value}
"));
267 tbl [metadicts [m.Result ("${key}
")]] = Decode (value, Encoding);
271 tbl ["table
"] = tables [id];
276 public bool Exists (string id, string ns)
278 string ns2 = ParseNamespace (ns);
280 return (ns2 != null ? (rows [ns] as Hashtable).ContainsKey (id) : false);
283 public int GetRowCount (string ns)
285 string ns2 = ParseNamespace (ns);
287 if (ns2 == null || rows [ns2] == null)
290 return (rows [ns2] as Hashtable).Count;
293 public int GetRowCount (string ns, string table)
296 string ns2 = ParseNamespace (ns);
298 if (ns2 == null || rows [ns2] == null)
301 foreach (string id in (rows [ns2] as Hashtable).Keys) {
302 if ((string) tables [id] == table)
309 public IEnumerator GetEnumerator ()
311 string ns = ParseNamespace (EnumNamespace);
313 if (ns == null || (rows [ns] as Hashtable) == null || Empty)
316 return (rows [ns] as Hashtable).Keys.GetEnumerator ();
325 mork_version = string.Empty;
328 public static string Convert (int char1, int char2, System.Text.Encoding to_encoding)
331 System.Text.Encoding from;
334 from = System.Text.Encoding.UTF7;
335 bytes = new byte[] { System.Convert.ToByte (char1) };
337 from = System.Text.Encoding.UTF8;
338 bytes = new byte[] { System.Convert.ToByte (char1), System.Convert.ToByte (char2) };
341 return to_encoding.GetString (System.Text.Encoding.Convert (from, to_encoding, bytes));
344 public static string Decode (string str, System.Text.Encoding to_encoding)
346 if (str == null || str == string.Empty || to_encoding == null || str.IndexOf ('$') == -1)
349 foreach (Match m in Regex.Matches (str, @"\
$(?<1>[0-9A
-F
]{2}
)\
$(?<2>[0-9A
-F
]{2}
)|\
$(?<3>[0-9A
-F
]{2}
)")) {
350 string char1 = m.Result ("${1}
"), char2 = m.Result ("${2}
"), char3 = m.Result ("${3}
");
352 if (char1 != string.Empty) {
353 str = str.Replace (String.Format (@"${0}
${1}
", char1, char2),
354 Convert (Thunderbird.Hex2Dec (char1), Thunderbird.Hex2Dec (char2), to_encoding));
356 str = str.Replace (String.Format (@"${0}
", char3),
357 Convert (Thunderbird.Hex2Dec (char3), -1, to_encoding));
368 foreach (Hashtable r in rows.Values)
375 public string EnumNamespace {
376 get { return enum_namespace; }
377 set { enum_namespace = value; }
380 public string Filename {
381 get { return mork_file; }
384 public string Version {
385 get { return mork_version; }
388 // There will always exist an item with id 1 in namespace 80, which means
389 // that when there are less than two items in the database, it's empty
391 get { return (rows.Count > 1 ? false : true); }
394 public System.Text.Encoding Encoding {
396 System.Text.Encoding encoding;
399 encoding = System.Text.Encoding.GetEncoding ((string) metadicts ["f
"]);
401 encoding = System.Text.Encoding.GetEncoding ("iso
-8859-1");
409 public class InvalidMorkDatabaseException : System.Exception {
411 public InvalidMorkDatabaseException (string message) : base (message)