Some more fixes wrt child-indexables. Namely, fix proper handling of child indexables...
[beagle.git] / Util / StringFu.cs
blob86de503afde6d08aba0015a1025e67510270a5b9
1 //
2 // StringFu.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.Globalization;
30 using System.IO;
31 using System.Text;
32 using System.Xml;
34 using Mono.Unix;
36 namespace Beagle.Util {
38 public class StringFu {
40 private StringFu () { } // class is static
42 public const string UnindexedNamespace = "_unindexed:";
44 private const String TimeFormat = "yyyyMMddHHmmss";
45 // FIXME: Fix all the UTC and timezone hack when switching to gmcs
46 private const String LocalTimeFormat = "yyyyMMddHHmmsszz";
48 static public string DateTimeToString (DateTime dt)
50 return dt.ToString (TimeFormat);
53 static public string DateTimeToYearMonthString (DateTime dt)
55 return dt.ToString ("yyyyMM");
58 static public string DateTimeToDayString (DateTime dt)
60 return dt.ToString ("dd");
63 static public DateTime StringToDateTime (string str)
65 if (str == null || str == "")
66 return new DateTime ();
68 str = string.Concat (str, "+00");
69 // Uncomment next 3 lines to see what how 20061107173446 (which is stored in UTC)
70 // used to be parsed as 2006-11-07T17:34:46.0000000-05:00
71 //DateTime dt = DateTime.ParseExact (str, LocalTimeFormat, CultureInfo.InvariantCulture);
72 //Console.WriteLine ("Parsed {0} as {1},{2}", str, dt, dt.ToString("yyyy-MM-ddTHH:mm:ss.fffffffzzz", CultureInfo.InvariantCulture));
73 //return dt;
74 // If no timezone is present, parse_exact uses local time zone
75 return DateTime.ParseExact (str, LocalTimeFormat, CultureInfo.InvariantCulture);
78 static public string DateTimeToFuzzy (DateTime dt)
80 DateTime today = DateTime.Today;
81 TimeSpan sinceToday = today - dt;
83 string date = null, time = null;
85 if (sinceToday.TotalDays <= 0)
86 date = Catalog.GetString ("Today");
87 else if (sinceToday.TotalDays < 1)
88 date = Catalog.GetString ("Yesterday");
89 else if (today.Year == dt.Year)
90 /* Translators: Example output: Aug 9 */
91 date = dt.ToString (Catalog.GetString ("MMM d"));
92 else
93 /* Translators: Example output: Aug 9, 2000 */
94 date = dt.ToString (Catalog.GetString ("MMM d, yyyy"));
96 /* Translators: Example output: 11:05 AM (note h = 12-hour time) */
97 time = dt.ToString (Catalog.GetString ("h:mm tt"));
99 string fuzzy;
101 if (date != null && time != null)
102 /* Translators: {0} is a date (e.g. 'Today' or 'Apr 23'), {1} is the time */
103 fuzzy = String.Format (Catalog.GetString ("{0}, {1}"), date, time);
104 else if (date != null)
105 fuzzy = date;
106 else
107 fuzzy = time;
109 return fuzzy;
112 public static string DateTimeToPrettyString (DateTime date)
114 DateTime now = DateTime.Now;
115 string short_time = date.ToShortTimeString ();
117 if (date.Year == now.Year) {
118 if (date.DayOfYear == now.DayOfYear) {
119 /* To translators: {0} is the time of the day, eg. 13:45 */
120 return String.Format (Catalog.GetString ("Today, {0}"), short_time);
121 } else if (date.DayOfYear == now.DayOfYear - 1) {
122 /* To translators: {0} is the time of the day, eg. 13:45 */
123 return String.Format (Catalog.GetString ("Yesterday, {0}"), short_time);
124 } else if (date.DayOfYear > now.DayOfYear - 6 && date.DayOfYear < now.DayOfYear) {
125 /* To translators: {0} is the number of days that have passed, {1} is the time of the day, eg. 13:45 */
126 return String.Format (Catalog.GetString ("{0} days ago, {1}"),
127 now.DayOfYear - date.DayOfYear,
128 short_time);
129 } else {
130 /* Translators: Example output: January 3, 3:45 PM */
131 return date.ToString (Catalog.GetString ("MMMM d, h:mm tt"));
135 /* Translators: Example output: March 23 2001, 10:04 AM */
136 return date.ToString (Catalog.GetString ("MMMM d yyyy, h:mm tt"));
139 public static string DurationToPrettyString (DateTime end_time, DateTime start_time)
141 TimeSpan span = end_time - start_time;
143 string span_str = "";
145 if (span.Hours > 0) {
146 span_str = String.Format (Catalog.GetPluralString ("{0} hour", "{0} hours", span.Hours), span.Hours);
148 if (span.Minutes > 0)
149 span_str += ", ";
152 if (span.Minutes > 0) {
153 span_str += String.Format (Catalog.GetPluralString ("{0} minute", "{0} minutes", span.Minutes), span.Minutes);
157 return span_str;
160 static public string FileLengthToString (long len)
162 const long oneMb = 1024*1024;
164 if (len < 0)
165 return "*BadLength*";
167 if (len < 1024)
168 /* Translators: {0} is a file size in bytes */
169 return String.Format (Catalog.GetString ("{0} bytes"), len);
171 if (len < oneMb)
172 /* Translators: {0} is a file size in kilobytes */
173 return String.Format (Catalog.GetString ("{0:0.0} KB"), len/(double)1024);
175 /* Translators: {0} is a file size in megabytes */
176 return String.Format (Catalog.GetString ("{0:0.0} MB"), len/(double)oneMb);
179 // Here we:
180 // (1) Replace non-alphanumeric characters with spaces
181 // (2) Inject whitespace between lowercase-to-uppercase
182 // transitions (so "FooBar" becomes "Foo Bar")
183 // and transitions between letters and numbers
184 // (so "cvs2svn" becomes "cvs 2 svn")
185 static public string FuzzyDivide (string line)
187 // Allocate a space slightly bigger than the
188 // original string.
189 StringBuilder builder;
190 builder = new StringBuilder (line.Length + 4);
192 int prev_case = 0;
193 bool last_was_space = true; // don't start w/ a space
194 for (int i = 0; i < line.Length; ++i) {
195 char c = line [i];
196 int this_case = 0;
197 if (Char.IsLetterOrDigit (c)) {
198 if (Char.IsUpper (c))
199 this_case = +1;
200 else if (Char.IsLower (c))
201 this_case = -1;
202 if (this_case != prev_case
203 && !(this_case == -1 && prev_case == +1)) {
204 if (! last_was_space) {
205 builder.Append (' ');
206 last_was_space = true;
210 if (c != ' ' || !last_was_space) {
211 builder.Append (c);
212 last_was_space = (c == ' ');
215 prev_case = this_case;
216 } else {
217 if (! last_was_space) {
218 builder.Append (' ');
219 last_was_space = true;
221 prev_case = 0;
225 return builder.ToString ();
228 public static string UrlFuzzyDivide (string url)
230 int protocol_index = url.IndexOf ("://");
231 return FuzzyDivide (url.Substring (protocol_index + 3));
234 // Match strings against patterns that are allowed to contain
235 // glob-style * wildcards.
236 // This recursive implementation is not particularly efficient,
237 // and probably will fail for weird corner cases.
238 static public bool GlobMatch (string pattern, string str)
240 if (pattern == null || str == null)
241 return false;
243 if (pattern == "*")
244 return true;
245 else if (pattern.StartsWith ("**"))
246 return GlobMatch (pattern.Substring (1), str);
247 else if (str == "" && pattern != "")
248 return false;
250 int i = pattern.IndexOf ('*');
251 if (i == -1)
252 return pattern == str;
253 else if (i > 0 && i < str.Length)
254 return pattern.Substring (0, i) == str.Substring (0, i)
255 && GlobMatch (pattern.Substring (i), str.Substring (i));
256 else if (i == 0)
257 return GlobMatch (pattern.Substring (1), str.Substring (1))
258 || GlobMatch (pattern.Substring (1), str)
259 || GlobMatch (pattern, str.Substring (1));
261 return false;
264 // FIXME: how do we do this operation in a culture-neutral way?
265 static public string[] SplitQuoted (string str)
267 char[] specialChars = new char [2] { ' ', '"' };
269 ArrayList array = new ArrayList ();
271 int i;
272 while ((i = str.IndexOfAny (specialChars)) != -1) {
273 if (str [i] == ' ') {
274 if (i > 0)
275 array.Add (str.Substring (0, i));
276 str = str.Substring (i+1);
277 } else if (str [i] == '"') {
278 int j = str.IndexOf ('"', i+1);
279 if (i > 0)
280 array.Add (str.Substring (0, i));
281 if (j == -1) {
282 if (i+1 < str.Length)
283 array.Add (str.Substring (i+1));
284 str = "";
285 } else {
286 if (j-i-1 > 0)
287 array.Add (str.Substring (i+1, j-i-1));
288 str = str.Substring (j+1);
292 if (str != "")
293 array.Add (str);
295 string [] retval = new string [array.Count];
296 for (i = 0; i < array.Count; ++i)
297 retval [i] = (string) array [i];
298 return retval;
301 static public bool ContainsWhiteSpace (string str)
303 foreach (char c in str)
304 if (char.IsWhiteSpace (c))
305 return true;
306 return false;
309 static char[] CharsToQuote = { ';', '?', ':', '@', '&', '=', '$', ',', '#', '%', '"', ' ' };
311 static public string HexEscape (string str)
313 StringBuilder builder = new StringBuilder ();
315 foreach (char c in str) {
317 if (Array.IndexOf (CharsToQuote, c) != -1)
318 builder.Append (Uri.HexEscape (c));
319 else if (c < 128)
320 builder.Append (c);
321 else {
322 byte[] utf8_bytes;
324 utf8_bytes = Encoding.UTF8.GetBytes (new char [] { c });
326 foreach (byte b in utf8_bytes)
327 builder.AppendFormat ("%{0:X}", b);
331 return builder.ToString ();
334 // Translate all %xx codes into real characters
335 static public string HexUnescape (string str)
337 ArrayList bytes = new ArrayList ();
338 byte[] sub_bytes;
339 int i, pos = 0;
341 while ((i = str.IndexOf ('%', pos)) != -1) {
342 sub_bytes = Encoding.UTF8.GetBytes (str.Substring (pos, i - pos));
343 bytes.AddRange (sub_bytes);
345 pos = i;
346 char unescaped = Uri.HexUnescape (str, ref pos);
347 bytes.Add ((byte) unescaped);
350 sub_bytes = Encoding.UTF8.GetBytes (str.Substring (pos, str.Length - pos));
351 bytes.AddRange (sub_bytes);
353 return Encoding.UTF8.GetString ((byte[]) bytes.ToArray (typeof (byte)));
356 // These strings should never be exposed to the user.
357 static int uid = 0;
358 static object uidLock = new object ();
359 static public string GetUniqueId ()
361 lock (uidLock) {
362 if (uid == 0) {
363 Random r = new Random ();
364 uid = r.Next ();
366 ++uid;
368 return string.Format ("{0}-{1}-{2}-{3}",
369 Environment.GetEnvironmentVariable ("USER"),
370 Environment.GetEnvironmentVariable ("HOST"),
371 DateTime.Now.Ticks,
372 uid);
376 static string [] replacements = new string [] {
377 "&amp;", "&lt;", "&gt;", "&quot;", "&apos;",
378 "&#xD;", "&#xA;"};
380 static private StringBuilder cachedStringBuilder;
381 static private char QuoteChar = '\"';
383 private static bool IsInvalid (int ch)
385 switch (ch) {
386 case 9:
387 case 10:
388 case 13:
389 return false;
391 if (ch < 32)
392 return true;
393 if (ch < 0xD800)
394 return false;
395 if (ch < 0xE000)
396 return true;
397 if (ch < 0xFFFE)
398 return false;
399 if (ch < 0x10000)
400 return true;
401 if (ch < 0x110000)
402 return false;
403 else
404 return true;
407 static public string EscapeStringForHtml (string source, bool skipQuotations)
409 int start = 0;
410 int pos = 0;
411 int count = source.Length;
412 char invalid = ' ';
413 for (int i = 0; i < count; i++) {
414 switch (source [i]) {
415 case '&': pos = 0; break;
416 case '<': pos = 1; break;
417 case '>': pos = 2; break;
418 case '\"':
419 if (skipQuotations) continue;
420 if (QuoteChar == '\'') continue;
421 pos = 3; break;
422 case '\'':
423 if (skipQuotations) continue;
424 if (QuoteChar == '\"') continue;
425 pos = 4; break;
426 case '\r':
427 if (skipQuotations) continue;
428 pos = 5; break;
429 case '\n':
430 if (skipQuotations) continue;
431 pos = 6; break;
432 default:
433 if (IsInvalid (source [i])) {
434 invalid = source [i];
435 pos = -1;
436 break;
438 else
439 continue;
441 if (cachedStringBuilder == null)
442 cachedStringBuilder = new StringBuilder
444 cachedStringBuilder.Append (source.Substring (start, i - start));
445 if (pos < 0) {
446 cachedStringBuilder.Append ("&#x");
447 if (invalid < (char) 255)
448 cachedStringBuilder.Append (((int) invalid).ToString ("X02", CultureInfo.InvariantCulture));
449 else
450 cachedStringBuilder.Append (((int) invalid).ToString ("X04", CultureInfo.InvariantCulture));
451 cachedStringBuilder.Append (";");
453 else
454 cachedStringBuilder.Append (replacements [pos]);
455 start = i + 1;
457 if (start == 0)
458 return source;
459 else if (start < count)
460 cachedStringBuilder.Append (source.Substring (start, count - start));
461 string s = cachedStringBuilder.ToString ();
462 cachedStringBuilder.Length = 0;
463 return s;
466 static public string CleanupInvalidXmlCharacters (string str)
468 if (str == null)
469 return null;
471 int len = str.Length;
473 // Find the first invalid character in the string
474 int i = 0;
475 while (i < len && ! IsInvalid (str [i]))
476 ++i;
478 // If the string doesn't contain invalid characters,
479 // just return it.
480 if (i >= len)
481 return str;
483 // Otherwise copy the first chunk, then go through
484 // character by character looking for more invalid stuff.
486 char [] char_array = new char[len];
488 for (int j = 0; j < i; ++j)
489 char_array [j] = str [j];
490 char_array [i] = ' ';
492 for (int j = i+1; j < len; ++j) {
493 char c = str [j];
494 if (IsInvalid (c))
495 char_array [j] = ' ';
496 else
497 char_array [j] = c;
500 return new string (char_array);
503 // Words of less than min_word_length characters are not counted
504 static public int CountWords (string str, int max_words, int min_word_length)
506 if (str == null)
507 return 0;
509 bool last_was_white = true;
510 int words = 0;
511 int word_start_pos = -1;
513 for (int i = 0; i < str.Length; ++i) {
514 if (Char.IsWhiteSpace (str [i])) {
515 // if just seen word is too short, ignore it
516 if (! last_was_white && (i - word_start_pos < min_word_length))
517 --words;
518 last_was_white = true;
519 } else {
520 if (last_was_white) {
521 ++words;
522 word_start_pos = i;
523 if (max_words > 0 && words >= max_words)
524 break;
526 last_was_white = false;
530 return words;
533 static public int CountWords (string str, int max_words)
535 return CountWords (str, max_words, -1);
538 static public int CountWords (string str)
540 return CountWords (str, -1);
543 // Strip trailing slashes and make sure we only have 1 leading slash
544 static public string SanitizePath (string path)
546 if (path.StartsWith ("//")) {
547 int pos;
548 for (pos = 2; pos < path.Length; pos++)
549 if (path [pos] != '/')
550 break;
552 path = path.Substring (pos - 1);
554 if (!(path.Length == 1 && path [0] == '/'))
555 path = path.TrimEnd ('/');
557 return path;
560 // This method will translate an email address like
561 // "john.doe+spamtrap@foo.com" to "john doe spamtrap foo"
563 // FIXME: Maybe we should only do the username part? Ie,
564 // "john doe spamtrap"? That way searching for "foo" won't
565 // turn up *everything*
566 static public string SanitizeEmail (string email)
568 char[] replace_array = { '@', '.', '-', '_', '+' };
569 string[] tlds = { "com", "net", "org", "edu", "gov", "mil" }; // Just the Big Six
571 if (email == null)
572 return null;
574 email = email.ToLower ();
576 string[] tmp = email.Split (replace_array);
577 email = String.Join (" ", tmp);
579 foreach (string tld in tlds) {
580 if (email.EndsWith (" " + tld)) {
581 email = email.Substring (0, email.Length - 4);
582 break;
586 return email;
590 * expands environment variables in a string e.g.
591 * folders=$HOME/.kde/share/...
593 public static string ExpandEnvVariables (string path)
595 int dollar_pos = path.IndexOf ('$');
596 if (dollar_pos == -1)
597 return path;
599 System.Text.StringBuilder sb =
600 new System.Text.StringBuilder ( (dollar_pos == 0 ? "" : path.Substring (0, dollar_pos)));
602 while (dollar_pos != -1 && dollar_pos + 1 < path.Length) {
603 // FIXME: kconfigbase.cpp contains an additional case, $(expression)/.kde/...
604 // Ignoring such complicated expressions for now. Volunteers ;) ?
605 int end_pos = dollar_pos;
606 if (path [dollar_pos + 1] != '$') {
607 string var_name;
608 end_pos ++;
609 if (path [end_pos] == '{') {
610 while ((end_pos < path.Length) &&
611 (path [end_pos] != '}'))
612 end_pos ++;
613 end_pos ++;
614 var_name = path.Substring (dollar_pos + 2, end_pos - dollar_pos - 3);
615 } else {
616 while ((end_pos < path.Length) &&
617 (Char.IsNumber (path [end_pos]) ||
618 Char.IsLetter (path [end_pos]) ||
619 path [end_pos] == '_'))
620 end_pos ++;
621 var_name = path.Substring (dollar_pos + 1, end_pos - dollar_pos - 1);
623 string value_env = null;
624 if (var_name != String.Empty)
625 value_env = Environment.GetEnvironmentVariable (var_name);
626 if (value_env != null) {
627 sb.Append (value_env);
629 // else, no environment variable with that name exists. ignore
630 }else // else, ignore the first '$', second one will be expanded
631 end_pos ++;
632 if (end_pos >= path.Length)
633 break;
634 dollar_pos = path.IndexOf ('$', end_pos);
635 if (dollar_pos == -1) {
636 sb.Append (path.Substring (end_pos));
637 } else {
638 sb.Append (path.Substring (end_pos, dollar_pos - end_pos));
642 return sb.ToString ();
645 public static string StripTags (string line, StringBuilder builder)
647 int first = line.IndexOf ('<');
648 if (first == -1)
649 return line;
651 builder.Length = 0;
653 int i = 0;
654 while (i < line.Length) {
656 int j;
657 if (first == -1) {
658 j = line.IndexOf ('<', i);
659 } else {
660 j = first;
661 first = -1;
664 int k = -1;
665 if (j != -1) {
666 k = line.IndexOf ('>', j);
668 // If a "<" is unmatched, preserve it, and the
669 // rest of the line
670 if (k == -1)
671 j = -1;
674 if (j == -1) {
675 builder.Append (line, i, line.Length - i);
676 break;
679 builder.Append (line, i, j-i);
681 i = k+1;
684 return builder.ToString ();
687 public static string StripTags (string line)
689 StringBuilder sb = new StringBuilder ();
690 return StripTags (line, sb);
693 public static string ConvertSpecialEntities (string line)
695 line.Replace ("&lt;", "<");
696 line.Replace ("&gt;", ">");
697 line.Replace ("&quot;", "\"");
698 line.Replace ("&amp;", "&");
699 line.Replace ("&nbsp", " ");
701 return line;
705 public class HtmlRemovingReader : TextReader {
707 private TextReader reader;
708 private StringBuilder sb;
710 public HtmlRemovingReader (TextReader reader)
712 this.reader = reader;
713 this.sb = new StringBuilder ();
716 public override string ReadLine ()
718 string line = reader.ReadLine ();
720 if (line == null)
721 return null;
723 sb.Length = 0;
724 line = StringFu.StripTags (line, sb);
725 line = StringFu.ConvertSpecialEntities (line);
727 return line;
730 public override void Close ()
732 reader.Close ();