4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
29 using System
.Globalization
;
36 namespace Beagle
.Util
{
38 public class StringFu
{
40 private StringFu () { }
// class is static
42 public const string UnindexedNamespace
= "_unindexed:";
44 private const String timeFormat
= "yyyyMMddHHmmss";
46 static public string DateTimeToString (DateTime dt
)
48 return dt
.ToString (timeFormat
);
51 static public string DateTimeToYearMonthString (DateTime dt
)
53 return dt
.ToString ("yyyyMM");
56 static public string DateTimeToDayString (DateTime dt
)
58 return dt
.ToString ("dd");
61 static public DateTime
StringToDateTime (string str
)
63 if (str
== null || str
== "")
64 return new DateTime ();
66 return DateTime
.ParseExact (str
, timeFormat
, CultureInfo
.CurrentCulture
);
69 static public string DateTimeToFuzzy (DateTime dt
)
71 DateTime today
= DateTime
.Today
;
72 TimeSpan sinceToday
= today
- dt
;
74 string date
= null, time
= null;
76 if (sinceToday
.TotalDays
<= 0)
77 date
= Catalog
.GetString ("Today");
78 else if (sinceToday
.TotalDays
< 1)
79 date
= Catalog
.GetString ("Yesterday");
80 else if (today
.Year
== dt
.Year
)
81 /* Translators: Example output: Aug 9 */
82 date
= dt
.ToString (Catalog
.GetString ("MMM d"));
84 /* Translators: Example output: Aug 9, 2000 */
85 date
= dt
.ToString (Catalog
.GetString ("MMM d, yyyy"));
87 /* Translators: Example output: 11:05 AM (note h = 12-hour time) */
88 time
= dt
.ToString (Catalog
.GetString ("h:mm tt"));
92 if (date
!= null && time
!= null)
93 /* Translators: {0} is a date (e.g. 'Today' or 'Apr 23'), {1} is the time */
94 fuzzy
= String
.Format (Catalog
.GetString ("{0}, {1}"), date
, time
);
95 else if (date
!= null)
103 public static string DateTimeToPrettyString (DateTime date
)
105 DateTime now
= DateTime
.Now
;
106 string short_time
= date
.ToShortTimeString ();
108 if (date
.Year
== now
.Year
) {
109 if (date
.DayOfYear
== now
.DayOfYear
) {
110 /* To translators: {0} is the time of the day, eg. 13:45 */
111 return String
.Format (Catalog
.GetString ("Today, {0}"), short_time
);
112 } else if (date
.DayOfYear
== now
.DayOfYear
- 1) {
113 /* To translators: {0} is the time of the day, eg. 13:45 */
114 return String
.Format (Catalog
.GetString ("Yesterday, {0}"), short_time
);
115 } else if (date
.DayOfYear
> now
.DayOfYear
- 6 && date
.DayOfYear
< now
.DayOfYear
) {
116 /* To translators: {0} is the number of days that have passed, {1} is the time of the day, eg. 13:45 */
117 return String
.Format (Catalog
.GetString ("{0} days ago, {1}"),
118 now
.DayOfYear
- date
.DayOfYear
,
121 /* Translators: Example output: January 3, 3:45 PM */
122 return date
.ToString (Catalog
.GetString ("MMMM d, h:mm tt"));
126 /* Translators: Example output: March 23 2001, 10:04 AM */
127 return date
.ToString (Catalog
.GetString ("MMMM d yyyy, h:mm tt"));
130 public static string DurationToPrettyString (DateTime end_time
, DateTime start_time
)
132 TimeSpan span
= end_time
- start_time
;
134 string span_str
= "";
136 if (span
.Hours
> 0) {
137 span_str
= String
.Format (Catalog
.GetPluralString ("{0} hour", "{0} hours", span
.Hours
), span
.Hours
);
139 if (span
.Minutes
> 0)
143 if (span
.Minutes
> 0) {
144 span_str
+= String
.Format (Catalog
.GetPluralString ("{0} minute", "{0} minutes", span
.Minutes
), span
.Minutes
);
151 static public string FileLengthToString (long len
)
153 const long oneMb
= 1024*1024;
156 return "*BadLength*";
159 /* Translators: {0} is a file size in bytes */
160 return String
.Format (Catalog
.GetString ("{0} bytes"), len
);
163 /* Translators: {0} is a file size in kilobytes */
164 return String
.Format (Catalog
.GetString ("{0:0.0} KB"), len
/(double)1024);
166 /* Translators: {0} is a file size in megabytes */
167 return String
.Format (Catalog
.GetString ("{0:0.0} MB"), len
/(double)oneMb
);
171 // (1) Replace non-alphanumeric characters with spaces
172 // (2) Inject whitespace between lowercase-to-uppercase
173 // transitions (so "FooBar" becomes "Foo Bar")
174 // and transitions between letters and numbers
175 // (so "cvs2svn" becomes "cvs 2 svn")
176 static public string FuzzyDivide (string line
)
178 // Allocate a space slightly bigger than the
180 StringBuilder builder
;
181 builder
= new StringBuilder (line
.Length
+ 4);
184 bool last_was_space
= true; // don't start w/ a space
185 for (int i
= 0; i
< line
.Length
; ++i
) {
188 if (Char
.IsLetterOrDigit (c
)) {
189 if (Char
.IsUpper (c
))
191 else if (Char
.IsLower (c
))
193 if (this_case
!= prev_case
194 && !(this_case
== -1 && prev_case
== +1)) {
195 if (! last_was_space
) {
196 builder
.Append (' ');
197 last_was_space
= true;
201 if (c
!= ' ' || !last_was_space
) {
203 last_was_space
= (c
== ' ');
206 prev_case
= this_case
;
208 if (! last_was_space
) {
209 builder
.Append (' ');
210 last_was_space
= true;
216 return builder
.ToString ();
219 public static string UrlFuzzyDivide (string url
)
221 int protocol_index
= url
.IndexOf ("://");
222 return FuzzyDivide (url
.Substring (protocol_index
+ 3));
225 // Match strings against patterns that are allowed to contain
226 // glob-style * wildcards.
227 // This recursive implementation is not particularly efficient,
228 // and probably will fail for weird corner cases.
229 static public bool GlobMatch (string pattern
, string str
)
231 if (pattern
== null || str
== null)
236 else if (pattern
.StartsWith ("**"))
237 return GlobMatch (pattern
.Substring (1), str
);
238 else if (str
== "" && pattern
!= "")
241 int i
= pattern
.IndexOf ('*');
243 return pattern
== str
;
244 else if (i
> 0 && i
< str
.Length
)
245 return pattern
.Substring (0, i
) == str
.Substring (0, i
)
246 && GlobMatch (pattern
.Substring (i
), str
.Substring (i
));
248 return GlobMatch (pattern
.Substring (1), str
.Substring (1))
249 || GlobMatch (pattern
.Substring (1), str
)
250 || GlobMatch (pattern
, str
.Substring (1));
255 // FIXME: how do we do this operation in a culture-neutral way?
256 static public string[] SplitQuoted (string str
)
258 char[] specialChars
= new char [2] { ' ', '"' }
;
260 ArrayList array
= new ArrayList ();
263 while ((i
= str
.IndexOfAny (specialChars
)) != -1) {
264 if (str
[i
] == ' ') {
266 array
.Add (str
.Substring (0, i
));
267 str
= str
.Substring (i
+1);
268 } else if (str
[i
] == '"') {
269 int j
= str
.IndexOf ('"', i
+1);
271 array
.Add (str
.Substring (0, i
));
273 if (i
+1 < str
.Length
)
274 array
.Add (str
.Substring (i
+1));
278 array
.Add (str
.Substring (i
+1, j
-i
-1));
279 str
= str
.Substring (j
+1);
286 string [] retval
= new string [array
.Count
];
287 for (i
= 0; i
< array
.Count
; ++i
)
288 retval
[i
] = (string) array
[i
];
292 static public bool ContainsWhiteSpace (string str
)
294 foreach (char c
in str
)
295 if (char.IsWhiteSpace (c
))
300 static char[] CharsToQuote
= { ';', '?', ':', '@', '&', '=', '$', ',', '#', '%', '"', ' ' }
;
302 static public string HexEscape (string str
)
304 StringBuilder builder
= new StringBuilder ();
306 foreach (char c
in str
) {
308 if (Array
.IndexOf (CharsToQuote
, c
) != -1)
309 builder
.Append (Uri
.HexEscape (c
));
315 utf8_bytes
= Encoding
.UTF8
.GetBytes (new char [] { c }
);
317 foreach (byte b
in utf8_bytes
)
318 builder
.AppendFormat ("%{0:X}", b
);
322 return builder
.ToString ();
325 // Translate all %xx codes into real characters
326 static public string HexUnescape (string str
)
328 ArrayList bytes
= new ArrayList ();
332 while ((i
= str
.IndexOf ('%', pos
)) != -1) {
333 sub_bytes
= Encoding
.UTF8
.GetBytes (str
.Substring (pos
, i
- pos
));
334 bytes
.AddRange (sub_bytes
);
337 char unescaped
= Uri
.HexUnescape (str
, ref pos
);
338 bytes
.Add ((byte) unescaped
);
341 sub_bytes
= Encoding
.UTF8
.GetBytes (str
.Substring (pos
, str
.Length
- pos
));
342 bytes
.AddRange (sub_bytes
);
344 return Encoding
.UTF8
.GetString ((byte[]) bytes
.ToArray (typeof (byte)));
347 // These strings should never be exposed to the user.
349 static object uidLock
= new object ();
350 static public string GetUniqueId ()
354 Random r
= new Random ();
359 return string.Format ("{0}-{1}-{2}-{3}",
360 Environment
.GetEnvironmentVariable ("USER"),
361 Environment
.GetEnvironmentVariable ("HOST"),
367 static string [] replacements
= new string [] {
368 "&", "<", ">", """, "'",
371 static private StringBuilder cachedStringBuilder
;
372 static private char QuoteChar
= '\"';
374 private static bool IsInvalid (int ch
)
398 static public string EscapeStringForHtml (string source
, bool skipQuotations
)
402 int count
= source
.Length
;
404 for (int i
= 0; i
< count
; i
++) {
405 switch (source
[i
]) {
406 case '&': pos
= 0; break;
407 case '<': pos
= 1; break;
408 case '>': pos
= 2; break;
410 if (skipQuotations
) continue;
411 if (QuoteChar
== '\'') continue;
414 if (skipQuotations
) continue;
415 if (QuoteChar
== '\"') continue;
418 if (skipQuotations
) continue;
421 if (skipQuotations
) continue;
424 if (IsInvalid (source
[i
])) {
425 invalid
= source
[i
];
432 if (cachedStringBuilder
== null)
433 cachedStringBuilder
= new StringBuilder
435 cachedStringBuilder
.Append (source
.Substring (start
, i
- start
));
437 cachedStringBuilder
.Append ("&#x");
438 if (invalid
< (char) 255)
439 cachedStringBuilder
.Append (((int) invalid
).ToString ("X02", CultureInfo
.InvariantCulture
));
441 cachedStringBuilder
.Append (((int) invalid
).ToString ("X04", CultureInfo
.InvariantCulture
));
442 cachedStringBuilder
.Append (";");
445 cachedStringBuilder
.Append (replacements
[pos
]);
450 else if (start
< count
)
451 cachedStringBuilder
.Append (source
.Substring (start
, count
- start
));
452 string s
= cachedStringBuilder
.ToString ();
453 cachedStringBuilder
.Length
= 0;
457 static public string CleanupInvalidXmlCharacters (string str
)
462 int len
= str
.Length
;
464 // Find the first invalid character in the string
466 while (i
< len
&& ! IsInvalid (str
[i
]))
469 // If the string doesn't contain invalid characters,
474 // Otherwise copy the first chunk, then go through
475 // character by character looking for more invalid stuff.
477 char [] char_array
= new char[len
];
479 for (int j
= 0; j
< i
; ++j
)
480 char_array
[j
] = str
[j
];
481 char_array
[i
] = ' ';
483 for (int j
= i
+1; j
< len
; ++j
) {
486 char_array
[j
] = ' ';
491 return new string (char_array
);
494 // Words of less than min_word_length characters are not counted
495 static public int CountWords (string str
, int max_words
, int min_word_length
)
500 bool last_was_white
= true;
502 int word_start_pos
= -1;
504 for (int i
= 0; i
< str
.Length
; ++i
) {
505 if (Char
.IsWhiteSpace (str
[i
])) {
506 // if just seen word is too short, ignore it
507 if (! last_was_white
&& (i
- word_start_pos
< min_word_length
))
509 last_was_white
= true;
511 if (last_was_white
) {
514 if (max_words
> 0 && words
>= max_words
)
517 last_was_white
= false;
524 static public int CountWords (string str
, int max_words
)
526 return CountWords (str
, max_words
, -1);
529 static public int CountWords (string str
)
531 return CountWords (str
, -1);
534 // Strip trailing slashes and make sure we only have 1 leading slash
535 static public string SanitizePath (string path
)
537 if (path
.StartsWith ("//")) {
539 for (pos
= 2; pos
< path
.Length
; pos
++)
540 if (path
[pos
] != '/')
543 path
= path
.Substring (pos
- 1);
545 if (!(path
.Length
== 1 && path
[0] == '/'))
546 path
= path
.TrimEnd ('/');
551 // This method will translate an email address like
552 // "john.doe+spamtrap@foo.com" to "john doe spamtrap foo"
554 // FIXME: Maybe we should only do the username part? Ie,
555 // "john doe spamtrap"? That way searching for "foo" won't
556 // turn up *everything*
557 static public string SanitizeEmail (string email
)
559 char[] replace_array
= { '@', '.', '-', '_', '+' }
;
560 string[] tlds
= { "com", "net", "org", "edu", "gov", "mil" }
; // Just the Big Six
565 email
= email
.ToLower ();
567 string[] tmp
= email
.Split (replace_array
);
568 email
= String
.Join (" ", tmp
);
570 foreach (string tld
in tlds
) {
571 if (email
.EndsWith (" " + tld
)) {
572 email
= email
.Substring (0, email
.Length
- 4);
581 * expands environment variables in a string e.g.
582 * folders=$HOME/.kde/share/...
584 public static string ExpandEnvVariables (string path
)
586 int dollar_pos
= path
.IndexOf ('$');
587 if (dollar_pos
== -1)
590 System
.Text
.StringBuilder sb
=
591 new System
.Text
.StringBuilder ( (dollar_pos
== 0 ? "" : path
.Substring (0, dollar_pos
)));
593 while (dollar_pos
!= -1 && dollar_pos
+ 1 < path
.Length
) {
594 // FIXME: kconfigbase.cpp contains an additional case, $(expression)/.kde/...
595 // Ignoring such complicated expressions for now. Volunteers ;) ?
596 int end_pos
= dollar_pos
;
597 if (path
[dollar_pos
+ 1] != '$') {
600 if (path
[end_pos
] == '{') {
601 while ((end_pos
< path
.Length
) &&
602 (path
[end_pos
] != '}'))
605 var_name
= path
.Substring (dollar_pos
+ 2, end_pos
- dollar_pos
- 3);
607 while ((end_pos
< path
.Length
) &&
608 (Char
.IsNumber (path
[end_pos
]) ||
609 Char
.IsLetter (path
[end_pos
]) ||
610 path
[end_pos
] == '_'))
612 var_name
= path
.Substring (dollar_pos
+ 1, end_pos
- dollar_pos
- 1);
614 string value_env
= null;
615 if (var_name
!= String
.Empty
)
616 value_env
= Environment
.GetEnvironmentVariable (var_name
);
617 if (value_env
!= null) {
618 sb
.Append (value_env
);
620 // else, no environment variable with that name exists. ignore
621 }else // else, ignore the first '$', second one will be expanded
623 if (end_pos
>= path
.Length
)
625 dollar_pos
= path
.IndexOf ('$', end_pos
);
626 if (dollar_pos
== -1) {
627 sb
.Append (path
.Substring (end_pos
));
629 sb
.Append (path
.Substring (end_pos
, dollar_pos
- end_pos
));
633 return sb
.ToString ();
636 public static string StripTags (string line
, StringBuilder builder
)
638 int first
= line
.IndexOf ('<');
645 while (i
< line
.Length
) {
649 j
= line
.IndexOf ('<', i
);
657 k
= line
.IndexOf ('>', j
);
659 // If a "<" is unmatched, preserve it, and the
666 builder
.Append (line
, i
, line
.Length
- i
);
670 builder
.Append (line
, i
, j
-i
);
675 return builder
.ToString ();
678 public static string StripTags (string line
)
680 StringBuilder sb
= new StringBuilder ();
681 return StripTags (line
, sb
);
684 public static string ConvertSpecialEntities (string line
)
686 line
.Replace ("<", "<");
687 line
.Replace (">", ">");
688 line
.Replace (""", "\"");
689 line
.Replace ("&", "&");
690 line
.Replace (" ", " ");
696 public class HtmlRemovingReader
: TextReader
{
698 private TextReader reader
;
699 private StringBuilder sb
;
701 public HtmlRemovingReader (TextReader reader
)
703 this.reader
= reader
;
704 this.sb
= new StringBuilder ();
707 public override string ReadLine ()
709 string line
= reader
.ReadLine ();
715 line
= StringFu
.StripTags (line
, sb
);
716 line
= StringFu
.ConvertSpecialEntities (line
);
721 public override void Close ()