4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
29 using System
.Globalization
;
36 namespace Beagle
.Util
{
38 public class StringFu
{
40 private StringFu () { }
// class is static
42 public const string UnindexedNamespace
= "_unindexed:";
44 private const String TimeFormat
= "yyyyMMddHHmmss";
45 // FIXME: Fix all the UTC and timezone hack when switching to .Net-2.0
46 private const String LocalTimeFormat
= "yyyyMMddHHmmsszz";
48 static public string DateTimeToString (DateTime dt
)
50 return dt
.ToString (TimeFormat
);
53 static public string DateTimeToYearMonthString (DateTime dt
)
55 return dt
.ToString ("yyyyMM");
58 static public string DateTimeToDayString (DateTime dt
)
60 return dt
.ToString ("dd");
63 static public DateTime
StringToDateTime (string str
)
65 if (str
== null || str
== "")
66 return new DateTime ();
68 str
= string.Concat (str
, "+00");
69 // Uncomment next 3 lines to see what how 20061107173446 (which is stored in UTC)
70 // used to be parsed as 2006-11-07T17:34:46.0000000-05:00
71 //DateTime dt = DateTime.ParseExact (str, LocalTimeFormat, CultureInfo.InvariantCulture);
72 //Console.WriteLine ("Parsed {0} as {1},{2}", str, dt, dt.ToString("yyyy-MM-ddTHH:mm:ss.fffffffzzz", CultureInfo.InvariantCulture));
74 // If no timezone is present, parse_exact uses local time zone
75 return DateTime
.ParseExact (str
, LocalTimeFormat
, CultureInfo
.InvariantCulture
);
78 static public string DateTimeToFuzzy (DateTime dt
)
80 DateTime today
= DateTime
.Today
;
81 TimeSpan sinceToday
= today
- dt
;
83 string date
= null, time
= null;
85 if (sinceToday
.TotalDays
<= 0)
86 date
= Catalog
.GetString ("Today");
87 else if (sinceToday
.TotalDays
< 1)
88 date
= Catalog
.GetString ("Yesterday");
89 else if (today
.Year
== dt
.Year
)
90 /* Translators: Example output: Aug 9 */
91 date
= dt
.ToString (Catalog
.GetString ("MMM d"));
93 /* Translators: Example output: Aug 9, 2000 */
94 date
= dt
.ToString (Catalog
.GetString ("MMM d, yyyy"));
96 /* Translators: Example output: 11:05 AM (note h = 12-hour time) */
97 time
= dt
.ToString (Catalog
.GetString ("h:mm tt"));
101 if (date
!= null && time
!= null)
102 /* Translators: {0} is a date (e.g. 'Today' or 'Apr 23'), {1} is the time */
103 fuzzy
= String
.Format (Catalog
.GetString ("{0}, {1}"), date
, time
);
104 else if (date
!= null)
112 public static string DateTimeToPrettyString (DateTime date
)
114 DateTime now
= DateTime
.Now
;
115 string short_time
= date
.ToShortTimeString ();
117 if (date
.Year
== now
.Year
) {
118 if (date
.DayOfYear
== now
.DayOfYear
) {
119 /* To translators: {0} is the time of the day, eg. 13:45 */
120 return String
.Format (Catalog
.GetString ("Today, {0}"), short_time
);
121 } else if (date
.DayOfYear
== now
.DayOfYear
- 1) {
122 /* To translators: {0} is the time of the day, eg. 13:45 */
123 return String
.Format (Catalog
.GetString ("Yesterday, {0}"), short_time
);
124 } else if (date
.DayOfYear
> now
.DayOfYear
- 6 && date
.DayOfYear
< now
.DayOfYear
) {
125 /* To translators: {0} is the number of days that have passed, {1} is the time of the day, eg. 13:45 */
126 return String
.Format (Catalog
.GetString ("{0} days ago, {1}"),
127 now
.DayOfYear
- date
.DayOfYear
,
130 /* Translators: Example output: January 3, 3:45 PM */
131 return date
.ToString (Catalog
.GetString ("MMMM d, h:mm tt"));
135 /* Translators: Example output: March 23 2001, 10:04 AM */
136 return date
.ToString (Catalog
.GetString ("MMMM d yyyy, h:mm tt"));
139 public static string DurationToPrettyString (DateTime end_time
, DateTime start_time
)
141 TimeSpan span
= end_time
- start_time
;
143 string span_str
= "";
145 if (span
.Hours
> 0) {
146 span_str
= String
.Format (Catalog
.GetPluralString ("{0} hour", "{0} hours", span
.Hours
), span
.Hours
);
148 if (span
.Minutes
> 0)
152 if (span
.Minutes
> 0) {
153 span_str
+= String
.Format (Catalog
.GetPluralString ("{0} minute", "{0} minutes", span
.Minutes
), span
.Minutes
);
160 static public string FileLengthToString (long len
)
162 const long oneMb
= 1024*1024;
165 return "*BadLength*";
168 /* Translators: {0} is a file size in bytes */
169 return String
.Format (Catalog
.GetString ("{0} bytes"), len
);
172 /* Translators: {0} is a file size in kilobytes */
173 return String
.Format (Catalog
.GetString ("{0:0.0} KB"), len
/(double)1024);
175 /* Translators: {0} is a file size in megabytes */
176 return String
.Format (Catalog
.GetString ("{0:0.0} MB"), len
/(double)oneMb
);
180 // (1) Replace non-alphanumeric characters with spaces
181 // (2) Inject whitespace between lowercase-to-uppercase
182 // transitions (so "FooBar" becomes "Foo Bar")
183 // and transitions between letters and numbers
184 // (so "cvs2svn" becomes "cvs 2 svn")
185 static public string FuzzyDivide (string line
)
187 // Allocate a space slightly bigger than the
189 StringBuilder builder
;
190 builder
= new StringBuilder (line
.Length
+ 4);
193 bool last_was_space
= true; // don't start w/ a space
194 for (int i
= 0; i
< line
.Length
; ++i
) {
197 if (Char
.IsLetterOrDigit (c
)) {
198 if (Char
.IsUpper (c
))
200 else if (Char
.IsLower (c
))
202 if (this_case
!= prev_case
203 && !(this_case
== -1 && prev_case
== +1)) {
204 if (! last_was_space
) {
205 builder
.Append (' ');
206 last_was_space
= true;
210 if (c
!= ' ' || !last_was_space
) {
212 last_was_space
= (c
== ' ');
215 prev_case
= this_case
;
217 if (! last_was_space
) {
218 builder
.Append (' ');
219 last_was_space
= true;
225 return builder
.ToString ();
228 public static string UrlFuzzyDivide (string url
)
230 int protocol_index
= url
.IndexOf ("://");
231 return FuzzyDivide (url
.Substring (protocol_index
+ 3));
234 // Match strings against patterns that are allowed to contain
235 // glob-style * wildcards.
236 // This recursive implementation is not particularly efficient,
237 // and probably will fail for weird corner cases.
238 static public bool GlobMatch (string pattern
, string str
)
240 if (pattern
== null || str
== null)
245 else if (pattern
.StartsWith ("**"))
246 return GlobMatch (pattern
.Substring (1), str
);
247 else if (str
== "" && pattern
!= "")
250 int i
= pattern
.IndexOf ('*');
252 return pattern
== str
;
253 else if (i
> 0 && i
< str
.Length
)
254 return pattern
.Substring (0, i
) == str
.Substring (0, i
)
255 && GlobMatch (pattern
.Substring (i
), str
.Substring (i
));
257 return GlobMatch (pattern
.Substring (1), str
.Substring (1))
258 || GlobMatch (pattern
.Substring (1), str
)
259 || GlobMatch (pattern
, str
.Substring (1));
264 // FIXME: how do we do this operation in a culture-neutral way?
265 static public string[] SplitQuoted (string str
)
267 char[] specialChars
= new char [2] { ' ', '"' }
;
269 ArrayList array
= new ArrayList ();
272 while ((i
= str
.IndexOfAny (specialChars
)) != -1) {
273 if (str
[i
] == ' ') {
275 array
.Add (str
.Substring (0, i
));
276 str
= str
.Substring (i
+1);
277 } else if (str
[i
] == '"') {
278 int j
= str
.IndexOf ('"', i
+1);
280 array
.Add (str
.Substring (0, i
));
282 if (i
+1 < str
.Length
)
283 array
.Add (str
.Substring (i
+1));
287 array
.Add (str
.Substring (i
+1, j
-i
-1));
288 str
= str
.Substring (j
+1);
295 string [] retval
= new string [array
.Count
];
296 for (i
= 0; i
< array
.Count
; ++i
)
297 retval
[i
] = (string) array
[i
];
301 static public bool ContainsWhiteSpace (string str
)
303 foreach (char c
in str
)
304 if (char.IsWhiteSpace (c
))
309 static char[] CharsToQuote
= { ';', '?', ':', '@', '&', '=', '$', ',', '#', '%', '"', ' ' }
;
311 static public string HexEscape (string str
)
313 StringBuilder builder
= new StringBuilder ();
315 foreach (char c
in str
) {
317 if (ArrayFu
.IndexOfChar (CharsToQuote
, c
) != -1)
318 builder
.Append (Uri
.HexEscape (c
));
324 utf8_bytes
= Encoding
.UTF8
.GetBytes (new char [] { c }
);
326 foreach (byte b
in utf8_bytes
)
327 builder
.AppendFormat ("%{0:X}", b
);
331 return builder
.ToString ();
334 // Translate all %xx codes into real characters
335 static public string HexUnescape (string str
)
337 ArrayList bytes
= new ArrayList ();
341 while ((i
= str
.IndexOf ('%', pos
)) != -1) {
342 sub_bytes
= Encoding
.UTF8
.GetBytes (str
.Substring (pos
, i
- pos
));
343 bytes
.AddRange (sub_bytes
);
346 char unescaped
= Uri
.HexUnescape (str
, ref pos
);
347 bytes
.Add ((byte) unescaped
);
350 sub_bytes
= Encoding
.UTF8
.GetBytes (str
.Substring (pos
, str
.Length
- pos
));
351 bytes
.AddRange (sub_bytes
);
353 return Encoding
.UTF8
.GetString ((byte[]) bytes
.ToArray (typeof (byte)));
356 // These strings should never be exposed to the user.
358 static object uidLock
= new object ();
359 static public string GetUniqueId ()
363 Random r
= new Random ();
368 return string.Format ("{0}-{1}-{2}-{3}",
369 Environment
.GetEnvironmentVariable ("USER"),
370 Environment
.GetEnvironmentVariable ("HOST"),
376 static string [] replacements
= new string [] {
377 "&", "<", ">", """, "'",
380 static private StringBuilder cachedStringBuilder
;
381 static private char QuoteChar
= '\"';
383 private static bool IsInvalid (int ch
)
407 static public string EscapeStringForHtml (string source
, bool skipQuotations
)
411 int count
= source
.Length
;
413 for (int i
= 0; i
< count
; i
++) {
414 switch (source
[i
]) {
415 case '&': pos
= 0; break;
416 case '<': pos
= 1; break;
417 case '>': pos
= 2; break;
419 if (skipQuotations
) continue;
420 if (QuoteChar
== '\'') continue;
423 if (skipQuotations
) continue;
424 if (QuoteChar
== '\"') continue;
427 if (skipQuotations
) continue;
430 if (skipQuotations
) continue;
433 if (IsInvalid (source
[i
])) {
434 invalid
= source
[i
];
441 if (cachedStringBuilder
== null)
442 cachedStringBuilder
= new StringBuilder
444 cachedStringBuilder
.Append (source
.Substring (start
, i
- start
));
446 cachedStringBuilder
.Append ("&#x");
447 if (invalid
< (char) 255)
448 cachedStringBuilder
.Append (((int) invalid
).ToString ("X02", CultureInfo
.InvariantCulture
));
450 cachedStringBuilder
.Append (((int) invalid
).ToString ("X04", CultureInfo
.InvariantCulture
));
451 cachedStringBuilder
.Append (";");
454 cachedStringBuilder
.Append (replacements
[pos
]);
459 else if (start
< count
)
460 cachedStringBuilder
.Append (source
.Substring (start
, count
- start
));
461 string s
= cachedStringBuilder
.ToString ();
462 cachedStringBuilder
.Length
= 0;
466 static public string CleanupInvalidXmlCharacters (string str
)
471 int len
= str
.Length
;
473 // Find the first invalid character in the string
475 while (i
< len
&& ! IsInvalid (str
[i
]))
478 // If the string doesn't contain invalid characters,
483 // Otherwise copy the first chunk, then go through
484 // character by character looking for more invalid stuff.
486 char [] char_array
= new char[len
];
488 for (int j
= 0; j
< i
; ++j
)
489 char_array
[j
] = str
[j
];
490 char_array
[i
] = ' ';
492 for (int j
= i
+1; j
< len
; ++j
) {
495 char_array
[j
] = ' ';
500 return new string (char_array
);
503 // Words of less than min_word_length characters are not counted
504 static public int CountWords (string str
, int max_words
, int min_word_length
)
509 bool last_was_white
= true;
511 int word_start_pos
= -1;
513 for (int i
= 0; i
< str
.Length
; ++i
) {
514 if (Char
.IsWhiteSpace (str
[i
])) {
515 // if just seen word is too short, ignore it
516 if (! last_was_white
&& (i
- word_start_pos
< min_word_length
))
518 last_was_white
= true;
520 if (last_was_white
) {
523 if (max_words
> 0 && words
>= max_words
)
526 last_was_white
= false;
533 static public int CountWords (string str
, int max_words
)
535 return CountWords (str
, max_words
, -1);
538 static public int CountWords (string str
)
540 return CountWords (str
, -1);
543 // Strip trailing slashes and make sure we only have 1 leading slash
544 static public string SanitizePath (string path
)
546 if (path
.StartsWith ("//")) {
548 for (pos
= 2; pos
< path
.Length
; pos
++)
549 if (path
[pos
] != '/')
552 path
= path
.Substring (pos
- 1);
554 if (!(path
.Length
== 1 && path
[0] == '/'))
555 path
= path
.TrimEnd ('/');
560 // This method will translate an email address like
561 // "john.doe+spamtrap@foo.com" to "john doe spamtrap foo"
563 // FIXME: Maybe we should only do the username part? Ie,
564 // "john doe spamtrap"? That way searching for "foo" won't
565 // turn up *everything*
566 static public string SanitizeEmail (string email
)
568 char[] replace_array
= { '@', '.', '-', '_', '+' }
;
569 string[] tlds
= { "com", "net", "org", "edu", "gov", "mil" }
; // Just the Big Six
574 email
= email
.ToLower ();
576 string[] tmp
= email
.Split (replace_array
);
577 email
= String
.Join (" ", tmp
);
579 foreach (string tld
in tlds
) {
580 if (email
.EndsWith (" " + tld
)) {
581 email
= email
.Substring (0, email
.Length
- 4);
590 * expands environment variables in a string e.g.
591 * folders=$HOME/.kde/share/...
593 public static string ExpandEnvVariables (string path
)
595 int dollar_pos
= path
.IndexOf ('$');
596 if (dollar_pos
== -1)
599 System
.Text
.StringBuilder sb
=
600 new System
.Text
.StringBuilder ( (dollar_pos
== 0 ? "" : path
.Substring (0, dollar_pos
)));
602 while (dollar_pos
!= -1 && dollar_pos
+ 1 < path
.Length
) {
603 // FIXME: kconfigbase.cpp contains an additional case, $(expression)/.kde/...
604 // Ignoring such complicated expressions for now. Volunteers ;) ?
605 int end_pos
= dollar_pos
;
606 if (path
[dollar_pos
+ 1] != '$') {
609 if (path
[end_pos
] == '{') {
610 while ((end_pos
< path
.Length
) &&
611 (path
[end_pos
] != '}'))
614 var_name
= path
.Substring (dollar_pos
+ 2, end_pos
- dollar_pos
- 3);
616 while ((end_pos
< path
.Length
) &&
617 (Char
.IsNumber (path
[end_pos
]) ||
618 Char
.IsLetter (path
[end_pos
]) ||
619 path
[end_pos
] == '_'))
621 var_name
= path
.Substring (dollar_pos
+ 1, end_pos
- dollar_pos
- 1);
623 string value_env
= null;
624 if (var_name
!= String
.Empty
)
625 value_env
= Environment
.GetEnvironmentVariable (var_name
);
626 if (value_env
!= null) {
627 sb
.Append (value_env
);
629 // else, no environment variable with that name exists. ignore
630 }else // else, ignore the first '$', second one will be expanded
632 if (end_pos
>= path
.Length
)
634 dollar_pos
= path
.IndexOf ('$', end_pos
);
635 if (dollar_pos
== -1) {
636 sb
.Append (path
.Substring (end_pos
));
638 sb
.Append (path
.Substring (end_pos
, dollar_pos
- end_pos
));
642 return sb
.ToString ();
645 public static string StripTags (string line
, StringBuilder builder
)
647 int first
= line
.IndexOf ('<');
654 while (i
< line
.Length
) {
658 j
= line
.IndexOf ('<', i
);
666 k
= line
.IndexOf ('>', j
);
668 // If a "<" is unmatched, preserve it, and the
675 builder
.Append (line
, i
, line
.Length
- i
);
679 builder
.Append (line
, i
, j
-i
);
684 return builder
.ToString ();
687 public static string StripTags (string line
)
689 StringBuilder sb
= new StringBuilder ();
690 return StripTags (line
, sb
);
693 public static string ConvertSpecialEntities (string line
)
695 line
.Replace ("<", "<");
696 line
.Replace (">", ">");
697 line
.Replace (""", "\"");
698 line
.Replace ("&", "&");
699 line
.Replace (" ", " ");
705 public class HtmlRemovingReader
: TextReader
{
707 private TextReader reader
;
708 private StringBuilder sb
;
710 public HtmlRemovingReader (TextReader reader
)
712 this.reader
= reader
;
713 this.sb
= new StringBuilder ();
716 public override string ReadLine ()
718 string line
= reader
.ReadLine ();
724 line
= StringFu
.StripTags (line
, sb
);
725 line
= StringFu
.ConvertSpecialEntities (line
);
730 public override void Close ()