Compute lucene-style scores for our hits.
[beagle.git] / Util / StringFu.cs
blobf50e5646f6b9744ada686d1b1ec3150c4092d181
1 //
2 // StringFu.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.Globalization;
30 using System.IO;
31 using System.Text;
32 using System.Xml;
34 using Mono.Posix;
36 namespace Beagle.Util {
38 public class StringFu {
40 private StringFu () { } // class is static
42 private const String timeFormat = "yyyyMMddHHmmss";
44 static public string DateTimeToString (DateTime dt)
46 return dt.ToString (timeFormat);
49 static public DateTime StringToDateTime (string str)
51 if (str == null || str == "")
52 return new DateTime ();
54 return DateTime.ParseExact (str, timeFormat, CultureInfo.CurrentCulture);
57 static public string DateTimeToFuzzy (DateTime dt)
59 DateTime today = DateTime.Today;
60 TimeSpan sinceToday = today - dt;
62 string date = null, time = null;
64 if (sinceToday.TotalDays <= 0)
65 date = Catalog.GetString ("Today");
66 else if (sinceToday.TotalDays < 1)
67 date = Catalog.GetString ("Yesterday");
68 else if (today.Year == dt.Year)
69 date = dt.ToString (Catalog.GetString ("MMM d"));
70 else
71 date = dt.ToString (Catalog.GetString ("MMM d, yyyy"));
73 time = dt.ToString (Catalog.GetString ("h:mm tt"));
75 string fuzzy;
77 if (date != null && time != null)
78 /* Translators: {0} is a date (e.g. 'Today' or 'Apr 23'), {1} is the time */
79 fuzzy = String.Format (Catalog.GetString ("{0}, {1}"), date, time);
80 else if (date != null)
81 fuzzy = date;
82 else
83 fuzzy = time;
85 return fuzzy;
88 public static string DateTimeToPrettyString (DateTime date)
90 DateTime now = DateTime.Now;
91 string short_time = date.ToShortTimeString ();
93 if (date.Year == now.Year) {
94 if (date.DayOfYear == now.DayOfYear) {
95 /* To translators: {0} is the time of the day, eg. 13:45 */
96 return String.Format (Catalog.GetString ("Today, {0}"), short_time);
97 } else if (date.DayOfYear == now.DayOfYear - 1) {
98 /* To translators: {0} is the time of the day, eg. 13:45 */
99 return String.Format (Catalog.GetString ("Yesterday, {0}"), short_time);
100 } else if (date.DayOfYear > now.DayOfYear - 6 && date.DayOfYear < now.DayOfYear) {
101 /* To translators: {0} is the number of days that have passed, {1} is the time of the day, eg. 13:45 */
102 return String.Format (Catalog.GetString ("{0} days ago, {1}"),
103 now.DayOfYear - date.DayOfYear,
104 short_time);
105 } else {
106 return date.ToString (Catalog.GetString ("MMMM d, h:mm tt"));
110 return date.ToString (Catalog.GetString ("MMMM d yyyy, h:mm tt"));
113 public static string DurationToPrettyString (DateTime end_time, DateTime start_time)
115 TimeSpan span = end_time - start_time;
117 string span_str = "";
119 if (span.Hours > 0) {
120 span_str = String.Format (Catalog.GetPluralString ("{0} hour", "{0} hours", span.Hours), span.Hours);
122 if (span.Minutes > 0)
123 span_str += ", ";
126 if (span.Minutes > 0) {
127 span_str += String.Format (Catalog.GetPluralString ("{0} minute", "{0} minutes", span.Minutes), span.Minutes);
131 return span_str;
134 static public string FileLengthToString (long len)
136 const long oneMb = 1024*1024;
138 if (len < 0)
139 return "*BadLength*";
141 if (len < 1024)
142 /* Translators: {0} is a file size in bytes */
143 return String.Format (Catalog.GetString ("{0} bytes"), len);
145 if (len < oneMb)
146 /* Translators: {0} is a file size in kilobytes */
147 return String.Format (Catalog.GetString ("{0:0.0} KB"), len/(double)1024);
149 /* Translators: {0} is a file size in megabytes */
150 return String.Format (Catalog.GetString ("{0:0.0} MB"), len/(double)oneMb);
153 // Here we:
154 // (1) Replace non-alphanumeric characters with spaces
155 // (2) Inject whitespace between lowercase-to-uppercase
156 // transitions (so "FooBar" becomes "Foo Bar")
157 // and transitions between letters and numbers
158 // (so "cvs2svn" becomes "cvs 2 svn")
159 static public string FuzzyDivide (string line)
161 // Allocate a space slightly bigger than the
162 // original string.
163 StringBuilder builder;
164 builder = new StringBuilder (line.Length + 4);
166 int prev_case = 0;
167 bool last_was_space = true; // don't start w/ a space
168 for (int i = 0; i < line.Length; ++i) {
169 char c = line [i];
170 int this_case = 0;
171 if (Char.IsLetterOrDigit (c)) {
172 if (Char.IsUpper (c))
173 this_case = +1;
174 else if (Char.IsLower (c))
175 this_case = -1;
176 if (this_case != prev_case
177 && !(this_case == -1 && prev_case == +1)) {
178 if (! last_was_space) {
179 builder.Append (' ');
180 last_was_space = true;
184 if (c != ' ' || !last_was_space) {
185 builder.Append (c);
186 last_was_space = (c == ' ');
189 prev_case = this_case;
190 } else {
191 if (! last_was_space) {
192 builder.Append (' ');
193 last_was_space = true;
195 prev_case = 0;
199 return builder.ToString ();
202 // Match strings against patterns that are allowed to contain
203 // glob-style * wildcards.
204 // This recursive implementation is not particularly efficient,
205 // and probably will fail for weird corner cases.
206 static public bool GlobMatch (string pattern, string str)
208 if (pattern == "*")
209 return true;
210 else if (pattern.StartsWith ("**"))
211 return GlobMatch (pattern.Substring (1), str);
212 else if (str == "" && pattern != "")
213 return false;
215 int i = pattern.IndexOf ('*');
216 if (i == -1)
217 return pattern == str;
218 else if (i > 0 && i < str.Length)
219 return pattern.Substring (0, i) == str.Substring (0, i)
220 && GlobMatch (pattern.Substring (i), str.Substring (i));
221 else if (i == 0)
222 return GlobMatch (pattern.Substring (1), str.Substring (1))
223 || GlobMatch (pattern.Substring (1), str)
224 || GlobMatch (pattern, str.Substring (1));
226 return false;
229 // FIXME: how do we do this operation in a culture-neutral way?
230 static public string[] SplitQuoted (string str)
232 char[] specialChars = new char [2] { ' ', '"' };
234 ArrayList array = new ArrayList ();
236 int i;
237 while ((i = str.IndexOfAny (specialChars)) != -1) {
238 if (str [i] == ' ') {
239 if (i > 0)
240 array.Add (str.Substring (0, i));
241 str = str.Substring (i+1);
242 } else if (str [i] == '"') {
243 int j = str.IndexOf ('"', i+1);
244 if (i > 0)
245 array.Add (str.Substring (0, i));
246 if (j == -1) {
247 if (i+1 < str.Length)
248 array.Add (str.Substring (i+1));
249 str = "";
250 } else {
251 if (j-i-1 > 0)
252 array.Add (str.Substring (i+1, j-i-1));
253 str = str.Substring (j+1);
257 if (str != "")
258 array.Add (str);
260 string [] retval = new string [array.Count];
261 for (i = 0; i < array.Count; ++i)
262 retval [i] = (string) array [i];
263 return retval;
266 static public bool ContainsWhiteSpace (string str)
268 foreach (char c in str)
269 if (char.IsWhiteSpace (c))
270 return true;
271 return false;
274 static char[] CharsToQuote = { ';', '?', ':', '@', '&', '=', '$', ',', '#', '%', '"', ' ' };
276 static public string HexEscape (string str)
278 StringBuilder builder = new StringBuilder ();
279 int i;
281 while ((i = str.IndexOfAny (CharsToQuote)) != -1) {
282 if (i > 0)
283 builder.Append (str.Substring (0, i));
284 builder.Append (Uri.HexEscape (str [i]));
285 str = str.Substring (i+1);
287 builder.Append (str);
289 return builder.ToString ();
292 // Translate all %xx codes into real characters
293 static public string HexUnescape (string str)
295 int i = 0, pos = 0;
296 while ((i = str.IndexOf ('%', pos)) != -1) {
297 pos = i;
298 char unescaped = UriFu.HexUnescape (str, ref pos);
299 str = str.Remove (i, 3);
300 str = str.Insert (i, new String(unescaped, 1));
301 pos -= 2;
303 return str;
306 static public string PathToQuotedFileUri (string path)
308 path = Path.GetFullPath (path);
309 return Uri.UriSchemeFile + Uri.SchemeDelimiter + HexEscape (path);
312 // These strings should never be exposed to the user.
313 static int uid = 0;
314 static object uidLock = new object ();
315 static public string GetUniqueId ()
317 lock (uidLock) {
318 if (uid == 0) {
319 Random r = new Random ();
320 uid = r.Next ();
322 ++uid;
324 return string.Format ("{0}-{1}-{2}-{3}",
325 Environment.GetEnvironmentVariable ("USER"),
326 Environment.GetEnvironmentVariable ("HOST"),
327 DateTime.Now.Ticks,
328 uid);
332 static string [] replacements = new string [] {
333 "&amp;", "&lt;", "&gt;", "&quot;", "&apos;",
334 "&#xD;", "&#xA;"};
336 static private StringBuilder cachedStringBuilder;
337 static private char QuoteChar = '\"';
339 private static bool IsInvalid (int ch)
341 switch (ch) {
342 case 9:
343 case 10:
344 case 13:
345 return false;
347 if (ch < 32)
348 return true;
349 if (ch < 0xD800)
350 return false;
351 if (ch < 0xE000)
352 return true;
353 if (ch < 0xFFFE)
354 return false;
355 if (ch < 0x10000)
356 return true;
357 if (ch < 0x110000)
358 return false;
359 else
360 return true;
363 static public string EscapeStringForHtml (string source, bool skipQuotations)
365 int start = 0;
366 int pos = 0;
367 int count = source.Length;
368 char invalid = ' ';
369 for (int i = 0; i < count; i++) {
370 switch (source [i]) {
371 case '&': pos = 0; break;
372 case '<': pos = 1; break;
373 case '>': pos = 2; break;
374 case '\"':
375 if (skipQuotations) continue;
376 if (QuoteChar == '\'') continue;
377 pos = 3; break;
378 case '\'':
379 if (skipQuotations) continue;
380 if (QuoteChar == '\"') continue;
381 pos = 4; break;
382 case '\r':
383 if (skipQuotations) continue;
384 pos = 5; break;
385 case '\n':
386 if (skipQuotations) continue;
387 pos = 6; break;
388 default:
389 if (IsInvalid (source [i])) {
390 invalid = source [i];
391 pos = -1;
392 break;
394 else
395 continue;
397 if (cachedStringBuilder == null)
398 cachedStringBuilder = new StringBuilder
400 cachedStringBuilder.Append (source.Substring (start, i - start));
401 if (pos < 0) {
402 cachedStringBuilder.Append ("&#x");
403 if (invalid < (char) 255)
404 cachedStringBuilder.Append (((int) invalid).ToString ("X02", CultureInfo.InvariantCulture));
405 else
406 cachedStringBuilder.Append (((int) invalid).ToString ("X04", CultureInfo.InvariantCulture));
407 cachedStringBuilder.Append (";");
409 else
410 cachedStringBuilder.Append (replacements [pos]);
411 start = i + 1;
413 if (start == 0)
414 return source;
415 else if (start < count)
416 cachedStringBuilder.Append (source.Substring (start, count - start));
417 string s = cachedStringBuilder.ToString ();
418 cachedStringBuilder.Length = 0;
419 return s;
422 static public string CleanupInvalidXmlCharacters (string str)
424 if (str == null)
425 return null;
427 int len = str.Length;
429 // Find the first invalid character in the string
430 int i = 0;
431 while (i < len && ! IsInvalid (str [i]))
432 ++i;
434 // If the string doesn't contain invalid characters,
435 // just return it.
436 if (i >= len)
437 return str;
439 // Otherwise copy the first chunk, then go through
440 // character by character looking for more invalid stuff.
442 char [] char_array = new char[len];
444 for (int j = 0; j < i; ++j)
445 char_array [j] = str [j];
446 char_array [i] = ' ';
448 for (int j = i+1; j < len; ++j) {
449 char c = str [j];
450 if (IsInvalid (c))
451 char_array [j] = ' ';
452 else
453 char_array [j] = c;
456 return new string (char_array);
459 static public int CountWords (string str, int max_words)
461 if (str == null)
462 return 0;
464 bool last_was_white = true;
465 int words = 0;
466 for (int i = 0; i < str.Length; ++i) {
467 if (Char.IsWhiteSpace (str [i])) {
468 last_was_white = true;
469 } else {
470 if (last_was_white) {
471 ++words;
472 if (max_words > 0 && words >= max_words)
473 break;
475 last_was_white = false;
479 return words;
482 static public int CountWords (string str)
484 return CountWords (str, -1);
487 // Strip trailing slashes and make sure we only have 1 leading slash
488 static public string SanitizePath (string path)
490 if (path.StartsWith ("//")) {
491 int pos;
492 for (pos = 2; pos < path.Length; pos++)
493 if (path [pos] != '/')
494 break;
496 path = path.Substring (pos - 1);
498 if (!(path.Length == 1 && path [0] == '/'))
499 path = path.TrimEnd ('/');
501 return path;