Add --enable-deletion option to buildindex. If used, buildindex will remove deleted...
[beagle.git] / Util / StringFu.cs
blob79e36beb858ed55ce704bbe2273ef1b0e7eb7f10
1 //
2 // StringFu.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.Globalization;
30 using System.IO;
31 using System.Text;
32 using System.Xml;
34 using Mono.Unix;
36 namespace Beagle.Util {
38 public class StringFu {
40 private StringFu () { } // class is static
42 public const string UnindexedNamespace = "_unindexed:";
44 private const String timeFormat = "yyyyMMddHHmmss";
46 static public string DateTimeToString (DateTime dt)
48 return dt.ToString (timeFormat);
51 static public string DateTimeToYearMonthString (DateTime dt)
53 return dt.ToString ("yyyyMM");
56 static public string DateTimeToDayString (DateTime dt)
58 return dt.ToString ("dd");
61 static public DateTime StringToDateTime (string str)
63 if (str == null || str == "")
64 return new DateTime ();
66 return DateTime.ParseExact (str, timeFormat, CultureInfo.CurrentCulture);
69 static public string DateTimeToFuzzy (DateTime dt)
71 DateTime today = DateTime.Today;
72 TimeSpan sinceToday = today - dt;
74 string date = null, time = null;
76 if (sinceToday.TotalDays <= 0)
77 date = Catalog.GetString ("Today");
78 else if (sinceToday.TotalDays < 1)
79 date = Catalog.GetString ("Yesterday");
80 else if (today.Year == dt.Year)
81 /* Translators: Example output: Aug 9 */
82 date = dt.ToString (Catalog.GetString ("MMM d"));
83 else
84 /* Translators: Example output: Aug 9, 2000 */
85 date = dt.ToString (Catalog.GetString ("MMM d, yyyy"));
87 /* Translators: Example output: 11:05 AM (note h = 12-hour time) */
88 time = dt.ToString (Catalog.GetString ("h:mm tt"));
90 string fuzzy;
92 if (date != null && time != null)
93 /* Translators: {0} is a date (e.g. 'Today' or 'Apr 23'), {1} is the time */
94 fuzzy = String.Format (Catalog.GetString ("{0}, {1}"), date, time);
95 else if (date != null)
96 fuzzy = date;
97 else
98 fuzzy = time;
100 return fuzzy;
103 public static string DateTimeToPrettyString (DateTime date)
105 DateTime now = DateTime.Now;
106 string short_time = date.ToShortTimeString ();
108 if (date.Year == now.Year) {
109 if (date.DayOfYear == now.DayOfYear) {
110 /* To translators: {0} is the time of the day, eg. 13:45 */
111 return String.Format (Catalog.GetString ("Today, {0}"), short_time);
112 } else if (date.DayOfYear == now.DayOfYear - 1) {
113 /* To translators: {0} is the time of the day, eg. 13:45 */
114 return String.Format (Catalog.GetString ("Yesterday, {0}"), short_time);
115 } else if (date.DayOfYear > now.DayOfYear - 6 && date.DayOfYear < now.DayOfYear) {
116 /* To translators: {0} is the number of days that have passed, {1} is the time of the day, eg. 13:45 */
117 return String.Format (Catalog.GetString ("{0} days ago, {1}"),
118 now.DayOfYear - date.DayOfYear,
119 short_time);
120 } else {
121 /* Translators: Example output: January 3, 3:45 PM */
122 return date.ToString (Catalog.GetString ("MMMM d, h:mm tt"));
126 /* Translators: Example output: March 23 2001, 10:04 AM */
127 return date.ToString (Catalog.GetString ("MMMM d yyyy, h:mm tt"));
130 public static string DurationToPrettyString (DateTime end_time, DateTime start_time)
132 TimeSpan span = end_time - start_time;
134 string span_str = "";
136 if (span.Hours > 0) {
137 span_str = String.Format (Catalog.GetPluralString ("{0} hour", "{0} hours", span.Hours), span.Hours);
139 if (span.Minutes > 0)
140 span_str += ", ";
143 if (span.Minutes > 0) {
144 span_str += String.Format (Catalog.GetPluralString ("{0} minute", "{0} minutes", span.Minutes), span.Minutes);
148 return span_str;
151 static public string FileLengthToString (long len)
153 const long oneMb = 1024*1024;
155 if (len < 0)
156 return "*BadLength*";
158 if (len < 1024)
159 /* Translators: {0} is a file size in bytes */
160 return String.Format (Catalog.GetString ("{0} bytes"), len);
162 if (len < oneMb)
163 /* Translators: {0} is a file size in kilobytes */
164 return String.Format (Catalog.GetString ("{0:0.0} KB"), len/(double)1024);
166 /* Translators: {0} is a file size in megabytes */
167 return String.Format (Catalog.GetString ("{0:0.0} MB"), len/(double)oneMb);
170 // Here we:
171 // (1) Replace non-alphanumeric characters with spaces
172 // (2) Inject whitespace between lowercase-to-uppercase
173 // transitions (so "FooBar" becomes "Foo Bar")
174 // and transitions between letters and numbers
175 // (so "cvs2svn" becomes "cvs 2 svn")
176 static public string FuzzyDivide (string line)
178 // Allocate a space slightly bigger than the
179 // original string.
180 StringBuilder builder;
181 builder = new StringBuilder (line.Length + 4);
183 int prev_case = 0;
184 bool last_was_space = true; // don't start w/ a space
185 for (int i = 0; i < line.Length; ++i) {
186 char c = line [i];
187 int this_case = 0;
188 if (Char.IsLetterOrDigit (c)) {
189 if (Char.IsUpper (c))
190 this_case = +1;
191 else if (Char.IsLower (c))
192 this_case = -1;
193 if (this_case != prev_case
194 && !(this_case == -1 && prev_case == +1)) {
195 if (! last_was_space) {
196 builder.Append (' ');
197 last_was_space = true;
201 if (c != ' ' || !last_was_space) {
202 builder.Append (c);
203 last_was_space = (c == ' ');
206 prev_case = this_case;
207 } else {
208 if (! last_was_space) {
209 builder.Append (' ');
210 last_was_space = true;
212 prev_case = 0;
216 return builder.ToString ();
219 public static string UrlFuzzyDivide (string url)
221 int protocol_index = url.IndexOf ("://");
222 return FuzzyDivide (url.Substring (protocol_index + 3));
225 // Match strings against patterns that are allowed to contain
226 // glob-style * wildcards.
227 // This recursive implementation is not particularly efficient,
228 // and probably will fail for weird corner cases.
229 static public bool GlobMatch (string pattern, string str)
231 if (pattern == null || str == null)
232 return false;
234 if (pattern == "*")
235 return true;
236 else if (pattern.StartsWith ("**"))
237 return GlobMatch (pattern.Substring (1), str);
238 else if (str == "" && pattern != "")
239 return false;
241 int i = pattern.IndexOf ('*');
242 if (i == -1)
243 return pattern == str;
244 else if (i > 0 && i < str.Length)
245 return pattern.Substring (0, i) == str.Substring (0, i)
246 && GlobMatch (pattern.Substring (i), str.Substring (i));
247 else if (i == 0)
248 return GlobMatch (pattern.Substring (1), str.Substring (1))
249 || GlobMatch (pattern.Substring (1), str)
250 || GlobMatch (pattern, str.Substring (1));
252 return false;
255 // FIXME: how do we do this operation in a culture-neutral way?
256 static public string[] SplitQuoted (string str)
258 char[] specialChars = new char [2] { ' ', '"' };
260 ArrayList array = new ArrayList ();
262 int i;
263 while ((i = str.IndexOfAny (specialChars)) != -1) {
264 if (str [i] == ' ') {
265 if (i > 0)
266 array.Add (str.Substring (0, i));
267 str = str.Substring (i+1);
268 } else if (str [i] == '"') {
269 int j = str.IndexOf ('"', i+1);
270 if (i > 0)
271 array.Add (str.Substring (0, i));
272 if (j == -1) {
273 if (i+1 < str.Length)
274 array.Add (str.Substring (i+1));
275 str = "";
276 } else {
277 if (j-i-1 > 0)
278 array.Add (str.Substring (i+1, j-i-1));
279 str = str.Substring (j+1);
283 if (str != "")
284 array.Add (str);
286 string [] retval = new string [array.Count];
287 for (i = 0; i < array.Count; ++i)
288 retval [i] = (string) array [i];
289 return retval;
292 static public bool ContainsWhiteSpace (string str)
294 foreach (char c in str)
295 if (char.IsWhiteSpace (c))
296 return true;
297 return false;
300 static char[] CharsToQuote = { ';', '?', ':', '@', '&', '=', '$', ',', '#', '%', '"', ' ' };
302 static public string HexEscape (string str)
304 StringBuilder builder = new StringBuilder ();
305 int i;
307 while ((i = str.IndexOfAny (CharsToQuote)) != -1) {
308 if (i > 0)
309 builder.Append (str.Substring (0, i));
310 builder.Append (Uri.HexEscape (str [i]));
311 str = str.Substring (i+1);
313 builder.Append (str);
315 return builder.ToString ();
318 // Translate all %xx codes into real characters
319 static public string HexUnescape (string str)
321 int i = 0, pos = 0;
322 while ((i = str.IndexOf ('%', pos)) != -1) {
323 pos = i;
324 char unescaped = Uri.HexUnescape (str, ref pos);
325 str = str.Remove (i, 3);
326 str = str.Insert (i, new String(unescaped, 1));
327 pos -= 2;
329 return str;
332 static public string PathToQuotedFileUri (string path)
334 path = Path.GetFullPath (path);
335 return Uri.UriSchemeFile + Uri.SchemeDelimiter + HexEscape (path);
338 // These strings should never be exposed to the user.
339 static int uid = 0;
340 static object uidLock = new object ();
341 static public string GetUniqueId ()
343 lock (uidLock) {
344 if (uid == 0) {
345 Random r = new Random ();
346 uid = r.Next ();
348 ++uid;
350 return string.Format ("{0}-{1}-{2}-{3}",
351 Environment.GetEnvironmentVariable ("USER"),
352 Environment.GetEnvironmentVariable ("HOST"),
353 DateTime.Now.Ticks,
354 uid);
358 static string [] replacements = new string [] {
359 "&amp;", "&lt;", "&gt;", "&quot;", "&apos;",
360 "&#xD;", "&#xA;"};
362 static private StringBuilder cachedStringBuilder;
363 static private char QuoteChar = '\"';
365 private static bool IsInvalid (int ch)
367 switch (ch) {
368 case 9:
369 case 10:
370 case 13:
371 return false;
373 if (ch < 32)
374 return true;
375 if (ch < 0xD800)
376 return false;
377 if (ch < 0xE000)
378 return true;
379 if (ch < 0xFFFE)
380 return false;
381 if (ch < 0x10000)
382 return true;
383 if (ch < 0x110000)
384 return false;
385 else
386 return true;
389 static public string EscapeStringForHtml (string source, bool skipQuotations)
391 int start = 0;
392 int pos = 0;
393 int count = source.Length;
394 char invalid = ' ';
395 for (int i = 0; i < count; i++) {
396 switch (source [i]) {
397 case '&': pos = 0; break;
398 case '<': pos = 1; break;
399 case '>': pos = 2; break;
400 case '\"':
401 if (skipQuotations) continue;
402 if (QuoteChar == '\'') continue;
403 pos = 3; break;
404 case '\'':
405 if (skipQuotations) continue;
406 if (QuoteChar == '\"') continue;
407 pos = 4; break;
408 case '\r':
409 if (skipQuotations) continue;
410 pos = 5; break;
411 case '\n':
412 if (skipQuotations) continue;
413 pos = 6; break;
414 default:
415 if (IsInvalid (source [i])) {
416 invalid = source [i];
417 pos = -1;
418 break;
420 else
421 continue;
423 if (cachedStringBuilder == null)
424 cachedStringBuilder = new StringBuilder
426 cachedStringBuilder.Append (source.Substring (start, i - start));
427 if (pos < 0) {
428 cachedStringBuilder.Append ("&#x");
429 if (invalid < (char) 255)
430 cachedStringBuilder.Append (((int) invalid).ToString ("X02", CultureInfo.InvariantCulture));
431 else
432 cachedStringBuilder.Append (((int) invalid).ToString ("X04", CultureInfo.InvariantCulture));
433 cachedStringBuilder.Append (";");
435 else
436 cachedStringBuilder.Append (replacements [pos]);
437 start = i + 1;
439 if (start == 0)
440 return source;
441 else if (start < count)
442 cachedStringBuilder.Append (source.Substring (start, count - start));
443 string s = cachedStringBuilder.ToString ();
444 cachedStringBuilder.Length = 0;
445 return s;
448 static public string CleanupInvalidXmlCharacters (string str)
450 if (str == null)
451 return null;
453 int len = str.Length;
455 // Find the first invalid character in the string
456 int i = 0;
457 while (i < len && ! IsInvalid (str [i]))
458 ++i;
460 // If the string doesn't contain invalid characters,
461 // just return it.
462 if (i >= len)
463 return str;
465 // Otherwise copy the first chunk, then go through
466 // character by character looking for more invalid stuff.
468 char [] char_array = new char[len];
470 for (int j = 0; j < i; ++j)
471 char_array [j] = str [j];
472 char_array [i] = ' ';
474 for (int j = i+1; j < len; ++j) {
475 char c = str [j];
476 if (IsInvalid (c))
477 char_array [j] = ' ';
478 else
479 char_array [j] = c;
482 return new string (char_array);
485 // Words of less than min_word_length characters are not counted
486 static public int CountWords (string str, int max_words, int min_word_length)
488 if (str == null)
489 return 0;
491 bool last_was_white = true;
492 int words = 0;
493 int word_start_pos = -1;
495 for (int i = 0; i < str.Length; ++i) {
496 if (Char.IsWhiteSpace (str [i])) {
497 // if just seen word is too short, ignore it
498 if (! last_was_white && (i - word_start_pos < min_word_length))
499 --words;
500 last_was_white = true;
501 } else {
502 if (last_was_white) {
503 ++words;
504 word_start_pos = i;
505 if (max_words > 0 && words >= max_words)
506 break;
508 last_was_white = false;
512 return words;
515 static public int CountWords (string str, int max_words)
517 return CountWords (str, max_words, -1);
520 static public int CountWords (string str)
522 return CountWords (str, -1);
525 // Strip trailing slashes and make sure we only have 1 leading slash
526 static public string SanitizePath (string path)
528 if (path.StartsWith ("//")) {
529 int pos;
530 for (pos = 2; pos < path.Length; pos++)
531 if (path [pos] != '/')
532 break;
534 path = path.Substring (pos - 1);
536 if (!(path.Length == 1 && path [0] == '/'))
537 path = path.TrimEnd ('/');
539 return path;
542 // This method will translate an email address like
543 // "john.doe+spamtrap@foo.com" to "john doe spamtrap foo"
545 // FIXME: Maybe we should only do the username part? Ie,
546 // "john doe spamtrap"? That way searching for "foo" won't
547 // turn up *everything*
548 static public string SanitizeEmail (string email)
550 char[] replace_array = { '@', '.', '-', '_', '+' };
551 string[] tlds = { "com", "net", "org", "edu", "gov", "mil" }; // Just the Big Six
553 if (email == null)
554 return null;
556 email = email.ToLower ();
558 string[] tmp = email.Split (replace_array);
559 email = String.Join (" ", tmp);
561 foreach (string tld in tlds) {
562 if (email.EndsWith (" " + tld)) {
563 email = email.Substring (0, email.Length - 4);
564 break;
568 return email;
572 * expands environment variables in a string e.g.
573 * folders=$HOME/.kde/share/...
575 public static string ExpandEnvVariables (string path)
577 int dollar_pos = path.IndexOf ('$');
578 if (dollar_pos == -1)
579 return path;
581 System.Text.StringBuilder sb =
582 new System.Text.StringBuilder ( (dollar_pos == 0 ? "" : path.Substring (0, dollar_pos)));
584 while (dollar_pos != -1 && dollar_pos + 1 < path.Length) {
585 // FIXME: kconfigbase.cpp contains an additional case, $(expression)/.kde/...
586 // Ignoring such complicated expressions for now. Volunteers ;) ?
587 int end_pos = dollar_pos;
588 if (path [dollar_pos + 1] != '$') {
589 string var_name;
590 end_pos ++;
591 if (path [end_pos] == '{') {
592 while ((end_pos < path.Length) &&
593 (path [end_pos] != '}'))
594 end_pos ++;
595 end_pos ++;
596 var_name = path.Substring (dollar_pos + 2, end_pos - dollar_pos - 3);
597 } else {
598 while ((end_pos < path.Length) &&
599 (Char.IsNumber (path [end_pos]) ||
600 Char.IsLetter (path [end_pos]) ||
601 path [end_pos] == '_'))
602 end_pos ++;
603 var_name = path.Substring (dollar_pos + 1, end_pos - dollar_pos - 1);
605 string value_env = null;
606 if (var_name != String.Empty)
607 value_env = Environment.GetEnvironmentVariable (var_name);
608 if (value_env != null) {
609 sb.Append (value_env);
611 // else, no environment variable with that name exists. ignore
612 }else // else, ignore the first '$', second one will be expanded
613 end_pos ++;
614 if (end_pos >= path.Length)
615 break;
616 dollar_pos = path.IndexOf ('$', end_pos);
617 if (dollar_pos == -1) {
618 sb.Append (path.Substring (end_pos));
619 } else {
620 sb.Append (path.Substring (end_pos, dollar_pos - end_pos));
624 return sb.ToString ();
627 public static string StripTags (string line, StringBuilder builder)
629 int first = line.IndexOf ('<');
630 if (first == -1)
631 return line;
633 builder.Length = 0;
635 int i = 0;
636 while (i < line.Length) {
638 int j;
639 if (first == -1) {
640 j = line.IndexOf ('<', i);
641 } else {
642 j = first;
643 first = -1;
646 int k = -1;
647 if (j != -1) {
648 k = line.IndexOf ('>', j);
650 // If a "<" is unmatched, preserve it, and the
651 // rest of the line
652 if (k == -1)
653 j = -1;
656 if (j == -1) {
657 builder.Append (line, i, line.Length - i);
658 break;
661 builder.Append (line, i, j-i);
663 i = k+1;
666 return builder.ToString ();
669 public static string StripTags (string line)
671 StringBuilder sb = new StringBuilder ();
672 return StripTags (line, sb);
675 public static string ConvertSpecialEntities (string line)
677 line.Replace ("&lt;", "<");
678 line.Replace ("&gt;", ">");
679 line.Replace ("&quot;", "\"");
680 line.Replace ("&amp;", "&");
681 line.Replace ("&nbsp", " ");
683 return line;
687 public class HtmlRemovingReader : TextReader {
689 private TextReader reader;
690 private StringBuilder sb;
692 public HtmlRemovingReader (TextReader reader)
694 this.reader = reader;
695 this.sb = new StringBuilder ();
698 public override string ReadLine ()
700 string line = reader.ReadLine ();
702 if (line == null)
703 return null;
705 sb.Length = 0;
706 line = StringFu.StripTags (line, sb);
707 line = StringFu.ConvertSpecialEntities (line);
709 return line;
712 public override void Close ()
714 reader.Close ();