utf8: add unit test for g_utf8_make_valid
[glib.git] / glib / gstrfuncs.c
blobeb0f1deff85f07372f6d750cfc461695bca92fbb
1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
26 * MT safe
29 #include "config.h"
31 #include <stdarg.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <locale.h>
35 #include <string.h>
36 #include <locale.h>
37 #include <errno.h>
38 #include <ctype.h> /* For tolower() */
40 #ifdef HAVE_XLOCALE_H
41 /* Needed on BSD/OS X for e.g. strtod_l */
42 #include <xlocale.h>
43 #endif
45 #ifdef G_OS_WIN32
46 #include <windows.h>
47 #endif
49 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
51 #include "gstrfuncs.h"
53 #include "gprintf.h"
54 #include "gprintfint.h"
55 #include "glibintl.h"
58 /**
59 * SECTION:string_utils
60 * @title: String Utility Functions
61 * @short_description: various string-related functions
63 * This section describes a number of utility functions for creating,
64 * duplicating, and manipulating strings.
66 * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
67 * g_snprintf(), g_vprintf(), g_vfprintf(), g_vsprintf() and g_vsnprintf()
68 * are declared in the header `gprintf.h` which is not included in `glib.h`
69 * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
70 * explicitly include `<glib/gprintf.h>` in order to use the GLib
71 * printf() functions.
73 * ## String precision pitfalls # {#string-precision}
75 * While you may use the printf() functions to format UTF-8 strings,
76 * notice that the precision of a \%Ns parameter is interpreted
77 * as the number of bytes, not characters to print. On top of that,
78 * the GNU libc implementation of the printf() functions has the
79 * "feature" that it checks that the string given for the \%Ns
80 * parameter consists of a whole number of characters in the current
81 * encoding. So, unless you are sure you are always going to be in an
82 * UTF-8 locale or your know your text is restricted to ASCII, avoid
83 * using \%Ns. If your intention is to format strings for a
84 * certain number of columns, then \%Ns is not a correct solution
85 * anyway, since it fails to take wide characters (see g_unichar_iswide())
86 * into account.
88 * Note also that there are various printf() parameters which are platform
89 * dependent. GLib provides platform independent macros for these parameters
90 * which should be used instead. A common example is %G_GUINT64_FORMAT, which
91 * should be used instead of `%llu` or similar parameters for formatting
92 * 64-bit integers. These macros are all named `G_*_FORMAT`; see
93 * [Basic Types][glib-Basic-Types].
96 /**
97 * g_ascii_isalnum:
98 * @c: any character
100 * Determines whether a character is alphanumeric.
102 * Unlike the standard C library isalnum() function, this only
103 * recognizes standard ASCII letters and ignores the locale,
104 * returning %FALSE for all non-ASCII characters. Also, unlike
105 * the standard library function, this takes a char, not an int,
106 * so don't call it on %EOF, but no need to cast to #guchar before
107 * passing a possibly non-ASCII character in.
109 * Returns: %TRUE if @c is an ASCII alphanumeric character
113 * g_ascii_isalpha:
114 * @c: any character
116 * Determines whether a character is alphabetic (i.e. a letter).
118 * Unlike the standard C library isalpha() function, this only
119 * recognizes standard ASCII letters and ignores the locale,
120 * returning %FALSE for all non-ASCII characters. Also, unlike
121 * the standard library function, this takes a char, not an int,
122 * so don't call it on %EOF, but no need to cast to #guchar before
123 * passing a possibly non-ASCII character in.
125 * Returns: %TRUE if @c is an ASCII alphabetic character
129 * g_ascii_iscntrl:
130 * @c: any character
132 * Determines whether a character is a control character.
134 * Unlike the standard C library iscntrl() function, this only
135 * recognizes standard ASCII control characters and ignores the
136 * locale, returning %FALSE for all non-ASCII characters. Also,
137 * unlike the standard library function, this takes a char, not
138 * an int, so don't call it on %EOF, but no need to cast to #guchar
139 * before passing a possibly non-ASCII character in.
141 * Returns: %TRUE if @c is an ASCII control character.
145 * g_ascii_isdigit:
146 * @c: any character
148 * Determines whether a character is digit (0-9).
150 * Unlike the standard C library isdigit() function, this takes
151 * a char, not an int, so don't call it on %EOF, but no need to
152 * cast to #guchar before passing a possibly non-ASCII character in.
154 * Returns: %TRUE if @c is an ASCII digit.
158 * g_ascii_isgraph:
159 * @c: any character
161 * Determines whether a character is a printing character and not a space.
163 * Unlike the standard C library isgraph() function, this only
164 * recognizes standard ASCII characters and ignores the locale,
165 * returning %FALSE for all non-ASCII characters. Also, unlike
166 * the standard library function, this takes a char, not an int,
167 * so don't call it on %EOF, but no need to cast to #guchar before
168 * passing a possibly non-ASCII character in.
170 * Returns: %TRUE if @c is an ASCII printing character other than space.
174 * g_ascii_islower:
175 * @c: any character
177 * Determines whether a character is an ASCII lower case letter.
179 * Unlike the standard C library islower() function, this only
180 * recognizes standard ASCII letters and ignores the locale,
181 * returning %FALSE for all non-ASCII characters. Also, unlike
182 * the standard library function, this takes a char, not an int,
183 * so don't call it on %EOF, but no need to worry about casting
184 * to #guchar before passing a possibly non-ASCII character in.
186 * Returns: %TRUE if @c is an ASCII lower case letter
190 * g_ascii_isprint:
191 * @c: any character
193 * Determines whether a character is a printing character.
195 * Unlike the standard C library isprint() function, this only
196 * recognizes standard ASCII characters and ignores the locale,
197 * returning %FALSE for all non-ASCII characters. Also, unlike
198 * the standard library function, this takes a char, not an int,
199 * so don't call it on %EOF, but no need to cast to #guchar before
200 * passing a possibly non-ASCII character in.
202 * Returns: %TRUE if @c is an ASCII printing character.
206 * g_ascii_ispunct:
207 * @c: any character
209 * Determines whether a character is a punctuation character.
211 * Unlike the standard C library ispunct() function, this only
212 * recognizes standard ASCII letters and ignores the locale,
213 * returning %FALSE for all non-ASCII characters. Also, unlike
214 * the standard library function, this takes a char, not an int,
215 * so don't call it on %EOF, but no need to cast to #guchar before
216 * passing a possibly non-ASCII character in.
218 * Returns: %TRUE if @c is an ASCII punctuation character.
222 * g_ascii_isspace:
223 * @c: any character
225 * Determines whether a character is a white-space character.
227 * Unlike the standard C library isspace() function, this only
228 * recognizes standard ASCII white-space and ignores the locale,
229 * returning %FALSE for all non-ASCII characters. Also, unlike
230 * the standard library function, this takes a char, not an int,
231 * so don't call it on %EOF, but no need to cast to #guchar before
232 * passing a possibly non-ASCII character in.
234 * Returns: %TRUE if @c is an ASCII white-space character
238 * g_ascii_isupper:
239 * @c: any character
241 * Determines whether a character is an ASCII upper case letter.
243 * Unlike the standard C library isupper() function, this only
244 * recognizes standard ASCII letters and ignores the locale,
245 * returning %FALSE for all non-ASCII characters. Also, unlike
246 * the standard library function, this takes a char, not an int,
247 * so don't call it on %EOF, but no need to worry about casting
248 * to #guchar before passing a possibly non-ASCII character in.
250 * Returns: %TRUE if @c is an ASCII upper case letter
254 * g_ascii_isxdigit:
255 * @c: any character
257 * Determines whether a character is a hexadecimal-digit character.
259 * Unlike the standard C library isxdigit() function, this takes
260 * a char, not an int, so don't call it on %EOF, but no need to
261 * cast to #guchar before passing a possibly non-ASCII character in.
263 * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
267 * G_ASCII_DTOSTR_BUF_SIZE:
269 * A good size for a buffer to be passed into g_ascii_dtostr().
270 * It is guaranteed to be enough for all output of that function
271 * on systems with 64bit IEEE-compatible doubles.
273 * The typical usage would be something like:
274 * |[<!-- language="C" -->
275 * char buf[G_ASCII_DTOSTR_BUF_SIZE];
277 * fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
278 * ]|
282 * g_strstrip:
283 * @string: a string to remove the leading and trailing whitespace from
285 * Removes leading and trailing whitespace from a string.
286 * See g_strchomp() and g_strchug().
288 * Returns: @string
292 * G_STR_DELIMITERS:
294 * The standard delimiters, used in g_strdelimit().
297 static const guint16 ascii_table_data[256] = {
298 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
299 0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
300 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
301 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
302 0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
303 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
304 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
305 0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
306 0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
307 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
308 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
309 0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
310 0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
311 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
312 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
313 0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
314 /* the upper 128 are all zeroes */
317 const guint16 * const g_ascii_table = ascii_table_data;
319 #if defined (HAVE_NEWLOCALE) && \
320 defined (HAVE_USELOCALE) && \
321 defined (HAVE_STRTOD_L) && \
322 defined (HAVE_STRTOULL_L) && \
323 defined (HAVE_STRTOLL_L)
324 #define USE_XLOCALE 1
325 #endif
327 #ifdef USE_XLOCALE
328 static locale_t
329 get_C_locale (void)
331 static gsize initialized = FALSE;
332 static locale_t C_locale = NULL;
334 if (g_once_init_enter (&initialized))
336 C_locale = newlocale (LC_ALL_MASK, "C", NULL);
337 g_once_init_leave (&initialized, TRUE);
340 return C_locale;
342 #endif
345 * g_strdup:
346 * @str: (nullable): the string to duplicate
348 * Duplicates a string. If @str is %NULL it returns %NULL.
349 * The returned string should be freed with g_free()
350 * when no longer needed.
352 * Returns: a newly-allocated copy of @str
354 gchar*
355 g_strdup (const gchar *str)
357 gchar *new_str;
358 gsize length;
360 if (str)
362 length = strlen (str) + 1;
363 new_str = g_new (char, length);
364 memcpy (new_str, str, length);
366 else
367 new_str = NULL;
369 return new_str;
373 * g_memdup:
374 * @mem: the memory to copy.
375 * @byte_size: the number of bytes to copy.
377 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
378 * from @mem. If @mem is %NULL it returns %NULL.
380 * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
381 * is %NULL.
383 gpointer
384 g_memdup (gconstpointer mem,
385 guint byte_size)
387 gpointer new_mem;
389 if (mem && byte_size != 0)
391 new_mem = g_malloc (byte_size);
392 memcpy (new_mem, mem, byte_size);
394 else
395 new_mem = NULL;
397 return new_mem;
401 * g_strndup:
402 * @str: the string to duplicate
403 * @n: the maximum number of bytes to copy from @str
405 * Duplicates the first @n bytes of a string, returning a newly-allocated
406 * buffer @n + 1 bytes long which will always be nul-terminated. If @str
407 * is less than @n bytes long the buffer is padded with nuls. If @str is
408 * %NULL it returns %NULL. The returned value should be freed when no longer
409 * needed.
411 * To copy a number of characters from a UTF-8 encoded string,
412 * use g_utf8_strncpy() instead.
414 * Returns: a newly-allocated buffer containing the first @n bytes
415 * of @str, nul-terminated
417 gchar*
418 g_strndup (const gchar *str,
419 gsize n)
421 gchar *new_str;
423 if (str)
425 new_str = g_new (gchar, n + 1);
426 strncpy (new_str, str, n);
427 new_str[n] = '\0';
429 else
430 new_str = NULL;
432 return new_str;
436 * g_strnfill:
437 * @length: the length of the new string
438 * @fill_char: the byte to fill the string with
440 * Creates a new string @length bytes long filled with @fill_char.
441 * The returned string should be freed when no longer needed.
443 * Returns: a newly-allocated string filled the @fill_char
445 gchar*
446 g_strnfill (gsize length,
447 gchar fill_char)
449 gchar *str;
451 str = g_new (gchar, length + 1);
452 memset (str, (guchar)fill_char, length);
453 str[length] = '\0';
455 return str;
459 * g_stpcpy:
460 * @dest: destination buffer.
461 * @src: source string.
463 * Copies a nul-terminated string into the dest buffer, include the
464 * trailing nul, and return a pointer to the trailing nul byte.
465 * This is useful for concatenating multiple strings together
466 * without having to repeatedly scan for the end.
468 * Returns: a pointer to trailing nul byte.
470 gchar *
471 g_stpcpy (gchar *dest,
472 const gchar *src)
474 #ifdef HAVE_STPCPY
475 g_return_val_if_fail (dest != NULL, NULL);
476 g_return_val_if_fail (src != NULL, NULL);
477 return stpcpy (dest, src);
478 #else
479 gchar *d = dest;
480 const gchar *s = src;
482 g_return_val_if_fail (dest != NULL, NULL);
483 g_return_val_if_fail (src != NULL, NULL);
485 *d++ = *s;
486 while (*s++ != '\0');
488 return d - 1;
489 #endif
493 * g_strdup_vprintf:
494 * @format: a standard printf() format string, but notice
495 * [string precision pitfalls][string-precision]
496 * @args: the list of parameters to insert into the format string
498 * Similar to the standard C vsprintf() function but safer, since it
499 * calculates the maximum space required and allocates memory to hold
500 * the result. The returned string should be freed with g_free() when
501 * no longer needed.
503 * See also g_vasprintf(), which offers the same functionality, but
504 * additionally returns the length of the allocated string.
506 * Returns: a newly-allocated string holding the result
508 gchar*
509 g_strdup_vprintf (const gchar *format,
510 va_list args)
512 gchar *string = NULL;
514 g_vasprintf (&string, format, args);
516 return string;
520 * g_strdup_printf:
521 * @format: a standard printf() format string, but notice
522 * [string precision pitfalls][string-precision]
523 * @...: the parameters to insert into the format string
525 * Similar to the standard C sprintf() function but safer, since it
526 * calculates the maximum space required and allocates memory to hold
527 * the result. The returned string should be freed with g_free() when no
528 * longer needed.
530 * Returns: a newly-allocated string holding the result
532 gchar*
533 g_strdup_printf (const gchar *format,
534 ...)
536 gchar *buffer;
537 va_list args;
539 va_start (args, format);
540 buffer = g_strdup_vprintf (format, args);
541 va_end (args);
543 return buffer;
547 * g_strconcat:
548 * @string1: the first string to add, which must not be %NULL
549 * @...: a %NULL-terminated list of strings to append to the string
551 * Concatenates all of the given strings into one long string. The
552 * returned string should be freed with g_free() when no longer needed.
554 * The variable argument list must end with %NULL. If you forget the %NULL,
555 * g_strconcat() will start appending random memory junk to your string.
557 * Note that this function is usually not the right function to use to
558 * assemble a translated message from pieces, since proper translation
559 * often requires the pieces to be reordered.
561 * Returns: a newly-allocated string containing all the string arguments
563 gchar*
564 g_strconcat (const gchar *string1, ...)
566 gsize l;
567 va_list args;
568 gchar *s;
569 gchar *concat;
570 gchar *ptr;
572 if (!string1)
573 return NULL;
575 l = 1 + strlen (string1);
576 va_start (args, string1);
577 s = va_arg (args, gchar*);
578 while (s)
580 l += strlen (s);
581 s = va_arg (args, gchar*);
583 va_end (args);
585 concat = g_new (gchar, l);
586 ptr = concat;
588 ptr = g_stpcpy (ptr, string1);
589 va_start (args, string1);
590 s = va_arg (args, gchar*);
591 while (s)
593 ptr = g_stpcpy (ptr, s);
594 s = va_arg (args, gchar*);
596 va_end (args);
598 return concat;
602 * g_strtod:
603 * @nptr: the string to convert to a numeric value.
604 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
605 * character after the last character used in the conversion.
607 * Converts a string to a #gdouble value.
608 * It calls the standard strtod() function to handle the conversion, but
609 * if the string is not completely converted it attempts the conversion
610 * again with g_ascii_strtod(), and returns the best match.
612 * This function should seldom be used. The normal situation when reading
613 * numbers not for human consumption is to use g_ascii_strtod(). Only when
614 * you know that you must expect both locale formatted and C formatted numbers
615 * should you use this. Make sure that you don't pass strings such as comma
616 * separated lists of values, since the commas may be interpreted as a decimal
617 * point in some locales, causing unexpected results.
619 * Returns: the #gdouble value.
621 gdouble
622 g_strtod (const gchar *nptr,
623 gchar **endptr)
625 gchar *fail_pos_1;
626 gchar *fail_pos_2;
627 gdouble val_1;
628 gdouble val_2 = 0;
630 g_return_val_if_fail (nptr != NULL, 0);
632 fail_pos_1 = NULL;
633 fail_pos_2 = NULL;
635 val_1 = strtod (nptr, &fail_pos_1);
637 if (fail_pos_1 && fail_pos_1[0] != 0)
638 val_2 = g_ascii_strtod (nptr, &fail_pos_2);
640 if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
642 if (endptr)
643 *endptr = fail_pos_1;
644 return val_1;
646 else
648 if (endptr)
649 *endptr = fail_pos_2;
650 return val_2;
655 * g_ascii_strtod:
656 * @nptr: the string to convert to a numeric value.
657 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
658 * character after the last character used in the conversion.
660 * Converts a string to a #gdouble value.
662 * This function behaves like the standard strtod() function
663 * does in the C locale. It does this without actually changing
664 * the current locale, since that would not be thread-safe.
665 * A limitation of the implementation is that this function
666 * will still accept localized versions of infinities and NANs.
668 * This function is typically used when reading configuration
669 * files or other non-user input that should be locale independent.
670 * To handle input from the user you should normally use the
671 * locale-sensitive system strtod() function.
673 * To convert from a #gdouble to a string in a locale-insensitive
674 * way, use g_ascii_dtostr().
676 * If the correct value would cause overflow, plus or minus %HUGE_VAL
677 * is returned (according to the sign of the value), and %ERANGE is
678 * stored in %errno. If the correct value would cause underflow,
679 * zero is returned and %ERANGE is stored in %errno.
681 * This function resets %errno before calling strtod() so that
682 * you can reliably detect overflow and underflow.
684 * Returns: the #gdouble value.
686 gdouble
687 g_ascii_strtod (const gchar *nptr,
688 gchar **endptr)
690 #ifdef USE_XLOCALE
692 g_return_val_if_fail (nptr != NULL, 0);
694 errno = 0;
696 return strtod_l (nptr, endptr, get_C_locale ());
698 #else
700 gchar *fail_pos;
701 gdouble val;
702 #ifndef __BIONIC__
703 struct lconv *locale_data;
704 #endif
705 const char *decimal_point;
706 int decimal_point_len;
707 const char *p, *decimal_point_pos;
708 const char *end = NULL; /* Silence gcc */
709 int strtod_errno;
711 g_return_val_if_fail (nptr != NULL, 0);
713 fail_pos = NULL;
715 #ifndef __BIONIC__
716 locale_data = localeconv ();
717 decimal_point = locale_data->decimal_point;
718 decimal_point_len = strlen (decimal_point);
719 #else
720 decimal_point = ".";
721 decimal_point_len = 1;
722 #endif
724 g_assert (decimal_point_len != 0);
726 decimal_point_pos = NULL;
727 end = NULL;
729 if (decimal_point[0] != '.' ||
730 decimal_point[1] != 0)
732 p = nptr;
733 /* Skip leading space */
734 while (g_ascii_isspace (*p))
735 p++;
737 /* Skip leading optional sign */
738 if (*p == '+' || *p == '-')
739 p++;
741 if (p[0] == '0' &&
742 (p[1] == 'x' || p[1] == 'X'))
744 p += 2;
745 /* HEX - find the (optional) decimal point */
747 while (g_ascii_isxdigit (*p))
748 p++;
750 if (*p == '.')
751 decimal_point_pos = p++;
753 while (g_ascii_isxdigit (*p))
754 p++;
756 if (*p == 'p' || *p == 'P')
757 p++;
758 if (*p == '+' || *p == '-')
759 p++;
760 while (g_ascii_isdigit (*p))
761 p++;
763 end = p;
765 else if (g_ascii_isdigit (*p) || *p == '.')
767 while (g_ascii_isdigit (*p))
768 p++;
770 if (*p == '.')
771 decimal_point_pos = p++;
773 while (g_ascii_isdigit (*p))
774 p++;
776 if (*p == 'e' || *p == 'E')
777 p++;
778 if (*p == '+' || *p == '-')
779 p++;
780 while (g_ascii_isdigit (*p))
781 p++;
783 end = p;
785 /* For the other cases, we need not convert the decimal point */
788 if (decimal_point_pos)
790 char *copy, *c;
792 /* We need to convert the '.' to the locale specific decimal point */
793 copy = g_malloc (end - nptr + 1 + decimal_point_len);
795 c = copy;
796 memcpy (c, nptr, decimal_point_pos - nptr);
797 c += decimal_point_pos - nptr;
798 memcpy (c, decimal_point, decimal_point_len);
799 c += decimal_point_len;
800 memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
801 c += end - (decimal_point_pos + 1);
802 *c = 0;
804 errno = 0;
805 val = strtod (copy, &fail_pos);
806 strtod_errno = errno;
808 if (fail_pos)
810 if (fail_pos - copy > decimal_point_pos - nptr)
811 fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
812 else
813 fail_pos = (char *)nptr + (fail_pos - copy);
816 g_free (copy);
819 else if (end)
821 char *copy;
823 copy = g_malloc (end - (char *)nptr + 1);
824 memcpy (copy, nptr, end - nptr);
825 *(copy + (end - (char *)nptr)) = 0;
827 errno = 0;
828 val = strtod (copy, &fail_pos);
829 strtod_errno = errno;
831 if (fail_pos)
833 fail_pos = (char *)nptr + (fail_pos - copy);
836 g_free (copy);
838 else
840 errno = 0;
841 val = strtod (nptr, &fail_pos);
842 strtod_errno = errno;
845 if (endptr)
846 *endptr = fail_pos;
848 errno = strtod_errno;
850 return val;
851 #endif
856 * g_ascii_dtostr:
857 * @buffer: A buffer to place the resulting string in
858 * @buf_len: The length of the buffer.
859 * @d: The #gdouble to convert
861 * Converts a #gdouble to a string, using the '.' as
862 * decimal point.
864 * This function generates enough precision that converting
865 * the string back using g_ascii_strtod() gives the same machine-number
866 * (on machines with IEEE compatible 64bit doubles). It is
867 * guaranteed that the size of the resulting string will never
868 * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes, including the terminating
869 * nul character, which is always added.
871 * Returns: The pointer to the buffer with the converted string.
873 gchar *
874 g_ascii_dtostr (gchar *buffer,
875 gint buf_len,
876 gdouble d)
878 return g_ascii_formatd (buffer, buf_len, "%.17g", d);
881 #pragma GCC diagnostic push
882 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
885 * g_ascii_formatd:
886 * @buffer: A buffer to place the resulting string in
887 * @buf_len: The length of the buffer.
888 * @format: The printf()-style format to use for the
889 * code to use for converting.
890 * @d: The #gdouble to convert
892 * Converts a #gdouble to a string, using the '.' as
893 * decimal point. To format the number you pass in
894 * a printf()-style format string. Allowed conversion
895 * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
897 * The returned buffer is guaranteed to be nul-terminated.
899 * If you just want to want to serialize the value into a
900 * string, use g_ascii_dtostr().
902 * Returns: The pointer to the buffer with the converted string.
904 gchar *
905 g_ascii_formatd (gchar *buffer,
906 gint buf_len,
907 const gchar *format,
908 gdouble d)
910 #ifdef USE_XLOCALE
911 locale_t old_locale;
913 old_locale = uselocale (get_C_locale ());
914 _g_snprintf (buffer, buf_len, format, d);
915 uselocale (old_locale);
917 return buffer;
918 #else
919 #ifndef __BIONIC__
920 struct lconv *locale_data;
921 #endif
922 const char *decimal_point;
923 int decimal_point_len;
924 gchar *p;
925 int rest_len;
926 gchar format_char;
928 g_return_val_if_fail (buffer != NULL, NULL);
929 g_return_val_if_fail (format[0] == '%', NULL);
930 g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
932 format_char = format[strlen (format) - 1];
934 g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
935 format_char == 'f' || format_char == 'F' ||
936 format_char == 'g' || format_char == 'G',
937 NULL);
939 if (format[0] != '%')
940 return NULL;
942 if (strpbrk (format + 1, "'l%"))
943 return NULL;
945 if (!(format_char == 'e' || format_char == 'E' ||
946 format_char == 'f' || format_char == 'F' ||
947 format_char == 'g' || format_char == 'G'))
948 return NULL;
950 _g_snprintf (buffer, buf_len, format, d);
952 #ifndef __BIONIC__
953 locale_data = localeconv ();
954 decimal_point = locale_data->decimal_point;
955 decimal_point_len = strlen (decimal_point);
956 #else
957 decimal_point = ".";
958 decimal_point_len = 1;
959 #endif
961 g_assert (decimal_point_len != 0);
963 if (decimal_point[0] != '.' ||
964 decimal_point[1] != 0)
966 p = buffer;
968 while (g_ascii_isspace (*p))
969 p++;
971 if (*p == '+' || *p == '-')
972 p++;
974 while (isdigit ((guchar)*p))
975 p++;
977 if (strncmp (p, decimal_point, decimal_point_len) == 0)
979 *p = '.';
980 p++;
981 if (decimal_point_len > 1)
983 rest_len = strlen (p + (decimal_point_len-1));
984 memmove (p, p + (decimal_point_len-1), rest_len);
985 p[rest_len] = 0;
990 return buffer;
991 #endif
993 #pragma GCC diagnostic pop
995 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
996 (c) == '\r' || (c) == '\t' || (c) == '\v')
997 #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z')
998 #define ISLOWER(c) ((c) >= 'a' && (c) <= 'z')
999 #define ISALPHA(c) (ISUPPER (c) || ISLOWER (c))
1000 #define TOUPPER(c) (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1001 #define TOLOWER(c) (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1003 #ifndef USE_XLOCALE
1005 static guint64
1006 g_parse_long_long (const gchar *nptr,
1007 const gchar **endptr,
1008 guint base,
1009 gboolean *negative)
1011 /* this code is based on on the strtol(3) code from GNU libc released under
1012 * the GNU Lesser General Public License.
1014 * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1015 * Free Software Foundation, Inc.
1017 gboolean overflow;
1018 guint64 cutoff;
1019 guint64 cutlim;
1020 guint64 ui64;
1021 const gchar *s, *save;
1022 guchar c;
1024 g_return_val_if_fail (nptr != NULL, 0);
1026 *negative = FALSE;
1027 if (base == 1 || base > 36)
1029 errno = EINVAL;
1030 if (endptr)
1031 *endptr = nptr;
1032 return 0;
1035 save = s = nptr;
1037 /* Skip white space. */
1038 while (ISSPACE (*s))
1039 ++s;
1041 if (G_UNLIKELY (!*s))
1042 goto noconv;
1044 /* Check for a sign. */
1045 if (*s == '-')
1047 *negative = TRUE;
1048 ++s;
1050 else if (*s == '+')
1051 ++s;
1053 /* Recognize number prefix and if BASE is zero, figure it out ourselves. */
1054 if (*s == '0')
1056 if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1058 s += 2;
1059 base = 16;
1061 else if (base == 0)
1062 base = 8;
1064 else if (base == 0)
1065 base = 10;
1067 /* Save the pointer so we can check later if anything happened. */
1068 save = s;
1069 cutoff = G_MAXUINT64 / base;
1070 cutlim = G_MAXUINT64 % base;
1072 overflow = FALSE;
1073 ui64 = 0;
1074 c = *s;
1075 for (; c; c = *++s)
1077 if (c >= '0' && c <= '9')
1078 c -= '0';
1079 else if (ISALPHA (c))
1080 c = TOUPPER (c) - 'A' + 10;
1081 else
1082 break;
1083 if (c >= base)
1084 break;
1085 /* Check for overflow. */
1086 if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1087 overflow = TRUE;
1088 else
1090 ui64 *= base;
1091 ui64 += c;
1095 /* Check if anything actually happened. */
1096 if (s == save)
1097 goto noconv;
1099 /* Store in ENDPTR the address of one character
1100 past the last character we converted. */
1101 if (endptr)
1102 *endptr = s;
1104 if (G_UNLIKELY (overflow))
1106 errno = ERANGE;
1107 return G_MAXUINT64;
1110 return ui64;
1112 noconv:
1113 /* We must handle a special case here: the base is 0 or 16 and the
1114 first two characters are '0' and 'x', but the rest are no
1115 hexadecimal digits. This is no error case. We return 0 and
1116 ENDPTR points to the `x`. */
1117 if (endptr)
1119 if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1120 && save[-2] == '0')
1121 *endptr = &save[-1];
1122 else
1123 /* There was no number to convert. */
1124 *endptr = nptr;
1126 return 0;
1128 #endif /* !USE_XLOCALE */
1131 * g_ascii_strtoull:
1132 * @nptr: the string to convert to a numeric value.
1133 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
1134 * character after the last character used in the conversion.
1135 * @base: to be used for the conversion, 2..36 or 0
1137 * Converts a string to a #guint64 value.
1138 * This function behaves like the standard strtoull() function
1139 * does in the C locale. It does this without actually
1140 * changing the current locale, since that would not be
1141 * thread-safe.
1143 * This function is typically used when reading configuration
1144 * files or other non-user input that should be locale independent.
1145 * To handle input from the user you should normally use the
1146 * locale-sensitive system strtoull() function.
1148 * If the correct value would cause overflow, %G_MAXUINT64
1149 * is returned, and `ERANGE` is stored in `errno`.
1150 * If the base is outside the valid range, zero is returned, and
1151 * `EINVAL` is stored in `errno`.
1152 * If the string conversion fails, zero is returned, and @endptr returns
1153 * @nptr (if @endptr is non-%NULL).
1155 * Returns: the #guint64 value or zero on error.
1157 * Since: 2.2
1159 guint64
1160 g_ascii_strtoull (const gchar *nptr,
1161 gchar **endptr,
1162 guint base)
1164 #ifdef USE_XLOCALE
1165 return strtoull_l (nptr, endptr, base, get_C_locale ());
1166 #else
1167 gboolean negative;
1168 guint64 result;
1170 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1172 /* Return the result of the appropriate sign. */
1173 return negative ? -result : result;
1174 #endif
1178 * g_ascii_strtoll:
1179 * @nptr: the string to convert to a numeric value.
1180 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
1181 * character after the last character used in the conversion.
1182 * @base: to be used for the conversion, 2..36 or 0
1184 * Converts a string to a #gint64 value.
1185 * This function behaves like the standard strtoll() function
1186 * does in the C locale. It does this without actually
1187 * changing the current locale, since that would not be
1188 * thread-safe.
1190 * This function is typically used when reading configuration
1191 * files or other non-user input that should be locale independent.
1192 * To handle input from the user you should normally use the
1193 * locale-sensitive system strtoll() function.
1195 * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1196 * is returned, and `ERANGE` is stored in `errno`.
1197 * If the base is outside the valid range, zero is returned, and
1198 * `EINVAL` is stored in `errno`. If the
1199 * string conversion fails, zero is returned, and @endptr returns @nptr
1200 * (if @endptr is non-%NULL).
1202 * Returns: the #gint64 value or zero on error.
1204 * Since: 2.12
1206 gint64
1207 g_ascii_strtoll (const gchar *nptr,
1208 gchar **endptr,
1209 guint base)
1211 #ifdef USE_XLOCALE
1212 return strtoll_l (nptr, endptr, base, get_C_locale ());
1213 #else
1214 gboolean negative;
1215 guint64 result;
1217 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1219 if (negative && result > (guint64) G_MININT64)
1221 errno = ERANGE;
1222 return G_MININT64;
1224 else if (!negative && result > (guint64) G_MAXINT64)
1226 errno = ERANGE;
1227 return G_MAXINT64;
1229 else if (negative)
1230 return - (gint64) result;
1231 else
1232 return (gint64) result;
1233 #endif
1237 * g_strerror:
1238 * @errnum: the system error number. See the standard C %errno
1239 * documentation
1241 * Returns a string corresponding to the given error code, e.g. "no
1242 * such process". Unlike strerror(), this always returns a string in
1243 * UTF-8 encoding, and the pointer is guaranteed to remain valid for
1244 * the lifetime of the process.
1246 * Note that the string may be translated according to the current locale.
1248 * The value of %errno will not be changed by this function.
1250 * Returns: a UTF-8 string describing the error code. If the error code
1251 * is unknown, it returns a string like "unknown error (<code>)".
1253 const gchar *
1254 g_strerror (gint errnum)
1256 static GHashTable *errors;
1257 G_LOCK_DEFINE_STATIC (errors);
1258 const gchar *msg;
1259 gint saved_errno = errno;
1261 G_LOCK (errors);
1262 if (errors)
1263 msg = g_hash_table_lookup (errors, GINT_TO_POINTER (errnum));
1264 else
1266 errors = g_hash_table_new (NULL, NULL);
1267 msg = NULL;
1270 if (!msg)
1272 gchar buf[1024];
1273 GError *error = NULL;
1275 #if defined(G_OS_WIN32)
1276 strerror_s (buf, sizeof (buf), errnum);
1277 msg = buf;
1278 #elif defined(HAVE_STRERROR_R)
1279 /* Match the condition in strerror_r(3) for glibc */
1280 # if defined(__GLIBC__) && !((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && ! _GNU_SOURCE)
1281 msg = strerror_r (errnum, buf, sizeof (buf));
1282 # else
1283 strerror_r (errnum, buf, sizeof (buf));
1284 msg = buf;
1285 # endif /* HAVE_STRERROR_R */
1286 #else
1287 g_strlcpy (buf, strerror (errnum), sizeof (buf));
1288 msg = buf;
1289 #endif
1290 if (!g_get_charset (NULL))
1292 msg = g_locale_to_utf8 (msg, -1, NULL, NULL, &error);
1293 if (error)
1294 g_print ("%s\n", error->message);
1296 else if (msg == (const gchar *)buf)
1297 msg = g_strdup (buf);
1299 g_hash_table_insert (errors, GINT_TO_POINTER (errnum), (char *) msg);
1301 G_UNLOCK (errors);
1303 errno = saved_errno;
1304 return msg;
1308 * g_strsignal:
1309 * @signum: the signal number. See the `signal` documentation
1311 * Returns a string describing the given signal, e.g. "Segmentation fault".
1312 * You should use this function in preference to strsignal(), because it
1313 * returns a string in UTF-8 encoding, and since not all platforms support
1314 * the strsignal() function.
1316 * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1317 * it returns "unknown signal (<signum>)".
1319 const gchar *
1320 g_strsignal (gint signum)
1322 gchar *msg;
1323 gchar *tofree;
1324 const gchar *ret;
1326 msg = tofree = NULL;
1328 #ifdef HAVE_STRSIGNAL
1329 msg = strsignal (signum);
1330 if (!g_get_charset (NULL))
1331 msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1332 #endif
1334 if (!msg)
1335 msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1336 ret = g_intern_string (msg);
1337 g_free (tofree);
1339 return ret;
1342 /* Functions g_strlcpy and g_strlcat were originally developed by
1343 * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1344 * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1345 * for more information.
1348 #ifdef HAVE_STRLCPY
1349 /* Use the native ones, if available; they might be implemented in assembly */
1350 gsize
1351 g_strlcpy (gchar *dest,
1352 const gchar *src,
1353 gsize dest_size)
1355 g_return_val_if_fail (dest != NULL, 0);
1356 g_return_val_if_fail (src != NULL, 0);
1358 return strlcpy (dest, src, dest_size);
1361 gsize
1362 g_strlcat (gchar *dest,
1363 const gchar *src,
1364 gsize dest_size)
1366 g_return_val_if_fail (dest != NULL, 0);
1367 g_return_val_if_fail (src != NULL, 0);
1369 return strlcat (dest, src, dest_size);
1372 #else /* ! HAVE_STRLCPY */
1374 * g_strlcpy:
1375 * @dest: destination buffer
1376 * @src: source buffer
1377 * @dest_size: length of @dest in bytes
1379 * Portability wrapper that calls strlcpy() on systems which have it,
1380 * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1381 * guaranteed to be nul-terminated; @src must be nul-terminated;
1382 * @dest_size is the buffer size, not the number of bytes to copy.
1384 * At most @dest_size - 1 characters will be copied. Always nul-terminates
1385 * (unless @dest_size is 0). This function does not allocate memory. Unlike
1386 * strncpy(), this function doesn't pad @dest (so it's often faster). It
1387 * returns the size of the attempted result, strlen (src), so if
1388 * @retval >= @dest_size, truncation occurred.
1390 * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1391 * but if you really want to avoid screwups, g_strdup() is an even better
1392 * idea.
1394 * Returns: length of @src
1396 gsize
1397 g_strlcpy (gchar *dest,
1398 const gchar *src,
1399 gsize dest_size)
1401 gchar *d = dest;
1402 const gchar *s = src;
1403 gsize n = dest_size;
1405 g_return_val_if_fail (dest != NULL, 0);
1406 g_return_val_if_fail (src != NULL, 0);
1408 /* Copy as many bytes as will fit */
1409 if (n != 0 && --n != 0)
1412 gchar c = *s++;
1414 *d++ = c;
1415 if (c == 0)
1416 break;
1418 while (--n != 0);
1420 /* If not enough room in dest, add NUL and traverse rest of src */
1421 if (n == 0)
1423 if (dest_size != 0)
1424 *d = 0;
1425 while (*s++)
1429 return s - src - 1; /* count does not include NUL */
1433 * g_strlcat:
1434 * @dest: destination buffer, already containing one nul-terminated string
1435 * @src: source buffer
1436 * @dest_size: length of @dest buffer in bytes (not length of existing string
1437 * inside @dest)
1439 * Portability wrapper that calls strlcat() on systems which have it,
1440 * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1441 * guaranteeing nul-termination for @dest. The total size of @dest won't
1442 * exceed @dest_size.
1444 * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1445 * @dest_size is the full size of dest, not the space left over. This
1446 * function does not allocate memory. It always nul-terminates (unless
1447 * @dest_size == 0 or there were no nul characters in the @dest_size
1448 * characters of dest to start with).
1450 * Caveat: this is supposedly a more secure alternative to strcat() or
1451 * strncat(), but for real security g_strconcat() is harder to mess up.
1453 * Returns: size of attempted result, which is MIN (dest_size, strlen
1454 * (original dest)) + strlen (src), so if retval >= dest_size,
1455 * truncation occurred.
1457 gsize
1458 g_strlcat (gchar *dest,
1459 const gchar *src,
1460 gsize dest_size)
1462 gchar *d = dest;
1463 const gchar *s = src;
1464 gsize bytes_left = dest_size;
1465 gsize dlength; /* Logically, MIN (strlen (d), dest_size) */
1467 g_return_val_if_fail (dest != NULL, 0);
1468 g_return_val_if_fail (src != NULL, 0);
1470 /* Find the end of dst and adjust bytes left but don't go past end */
1471 while (*d != 0 && bytes_left-- != 0)
1472 d++;
1473 dlength = d - dest;
1474 bytes_left = dest_size - dlength;
1476 if (bytes_left == 0)
1477 return dlength + strlen (s);
1479 while (*s != 0)
1481 if (bytes_left != 1)
1483 *d++ = *s;
1484 bytes_left--;
1486 s++;
1488 *d = 0;
1490 return dlength + (s - src); /* count does not include NUL */
1492 #endif /* ! HAVE_STRLCPY */
1495 * g_ascii_strdown:
1496 * @str: a string
1497 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1499 * Converts all upper case ASCII letters to lower case ASCII letters.
1501 * Returns: a newly-allocated string, with all the upper case
1502 * characters in @str converted to lower case, with semantics that
1503 * exactly match g_ascii_tolower(). (Note that this is unlike the
1504 * old g_strdown(), which modified the string in place.)
1506 gchar*
1507 g_ascii_strdown (const gchar *str,
1508 gssize len)
1510 gchar *result, *s;
1512 g_return_val_if_fail (str != NULL, NULL);
1514 if (len < 0)
1515 len = strlen (str);
1517 result = g_strndup (str, len);
1518 for (s = result; *s; s++)
1519 *s = g_ascii_tolower (*s);
1521 return result;
1525 * g_ascii_strup:
1526 * @str: a string
1527 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1529 * Converts all lower case ASCII letters to upper case ASCII letters.
1531 * Returns: a newly allocated string, with all the lower case
1532 * characters in @str converted to upper case, with semantics that
1533 * exactly match g_ascii_toupper(). (Note that this is unlike the
1534 * old g_strup(), which modified the string in place.)
1536 gchar*
1537 g_ascii_strup (const gchar *str,
1538 gssize len)
1540 gchar *result, *s;
1542 g_return_val_if_fail (str != NULL, NULL);
1544 if (len < 0)
1545 len = strlen (str);
1547 result = g_strndup (str, len);
1548 for (s = result; *s; s++)
1549 *s = g_ascii_toupper (*s);
1551 return result;
1555 * g_str_is_ascii:
1556 * @str: a string
1558 * Determines if a string is pure ASCII. A string is pure ASCII if it
1559 * contains no bytes with the high bit set.
1561 * Returns: %TRUE if @str is ASCII
1563 * Since: 2.40
1565 gboolean
1566 g_str_is_ascii (const gchar *str)
1568 gint i;
1570 for (i = 0; str[i]; i++)
1571 if (str[i] & 0x80)
1572 return FALSE;
1574 return TRUE;
1578 * g_strdown:
1579 * @string: the string to convert.
1581 * Converts a string to lower case.
1583 * Returns: the string
1585 * Deprecated:2.2: This function is totally broken for the reasons discussed
1586 * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1587 * instead.
1589 gchar*
1590 g_strdown (gchar *string)
1592 guchar *s;
1594 g_return_val_if_fail (string != NULL, NULL);
1596 s = (guchar *) string;
1598 while (*s)
1600 if (isupper (*s))
1601 *s = tolower (*s);
1602 s++;
1605 return (gchar *) string;
1609 * g_strup:
1610 * @string: the string to convert
1612 * Converts a string to upper case.
1614 * Returns: the string
1616 * Deprecated:2.2: This function is totally broken for the reasons
1617 * discussed in the g_strncasecmp() docs - use g_ascii_strup()
1618 * or g_utf8_strup() instead.
1620 gchar*
1621 g_strup (gchar *string)
1623 guchar *s;
1625 g_return_val_if_fail (string != NULL, NULL);
1627 s = (guchar *) string;
1629 while (*s)
1631 if (islower (*s))
1632 *s = toupper (*s);
1633 s++;
1636 return (gchar *) string;
1640 * g_strreverse:
1641 * @string: the string to reverse
1643 * Reverses all of the bytes in a string. For example,
1644 * `g_strreverse ("abcdef")` will result in "fedcba".
1646 * Note that g_strreverse() doesn't work on UTF-8 strings
1647 * containing multibyte characters. For that purpose, use
1648 * g_utf8_strreverse().
1650 * Returns: the same pointer passed in as @string
1652 gchar*
1653 g_strreverse (gchar *string)
1655 g_return_val_if_fail (string != NULL, NULL);
1657 if (*string)
1659 gchar *h, *t;
1661 h = string;
1662 t = string + strlen (string) - 1;
1664 while (h < t)
1666 gchar c;
1668 c = *h;
1669 *h = *t;
1670 h++;
1671 *t = c;
1672 t--;
1676 return string;
1680 * g_ascii_tolower:
1681 * @c: any character
1683 * Convert a character to ASCII lower case.
1685 * Unlike the standard C library tolower() function, this only
1686 * recognizes standard ASCII letters and ignores the locale, returning
1687 * all non-ASCII characters unchanged, even if they are lower case
1688 * letters in a particular character set. Also unlike the standard
1689 * library function, this takes and returns a char, not an int, so
1690 * don't call it on %EOF but no need to worry about casting to #guchar
1691 * before passing a possibly non-ASCII character in.
1693 * Returns: the result of converting @c to lower case. If @c is
1694 * not an ASCII upper case letter, @c is returned unchanged.
1696 gchar
1697 g_ascii_tolower (gchar c)
1699 return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1703 * g_ascii_toupper:
1704 * @c: any character
1706 * Convert a character to ASCII upper case.
1708 * Unlike the standard C library toupper() function, this only
1709 * recognizes standard ASCII letters and ignores the locale, returning
1710 * all non-ASCII characters unchanged, even if they are upper case
1711 * letters in a particular character set. Also unlike the standard
1712 * library function, this takes and returns a char, not an int, so
1713 * don't call it on %EOF but no need to worry about casting to #guchar
1714 * before passing a possibly non-ASCII character in.
1716 * Returns: the result of converting @c to upper case. If @c is not
1717 * an ASCII lower case letter, @c is returned unchanged.
1719 gchar
1720 g_ascii_toupper (gchar c)
1722 return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1726 * g_ascii_digit_value:
1727 * @c: an ASCII character
1729 * Determines the numeric value of a character as a decimal digit.
1730 * Differs from g_unichar_digit_value() because it takes a char, so
1731 * there's no worry about sign extension if characters are signed.
1733 * Returns: If @c is a decimal digit (according to g_ascii_isdigit()),
1734 * its numeric value. Otherwise, -1.
1737 g_ascii_digit_value (gchar c)
1739 if (g_ascii_isdigit (c))
1740 return c - '0';
1741 return -1;
1745 * g_ascii_xdigit_value:
1746 * @c: an ASCII character.
1748 * Determines the numeric value of a character as a hexidecimal
1749 * digit. Differs from g_unichar_xdigit_value() because it takes
1750 * a char, so there's no worry about sign extension if characters
1751 * are signed.
1753 * Returns: If @c is a hex digit (according to g_ascii_isxdigit()),
1754 * its numeric value. Otherwise, -1.
1757 g_ascii_xdigit_value (gchar c)
1759 if (c >= 'A' && c <= 'F')
1760 return c - 'A' + 10;
1761 if (c >= 'a' && c <= 'f')
1762 return c - 'a' + 10;
1763 return g_ascii_digit_value (c);
1767 * g_ascii_strcasecmp:
1768 * @s1: string to compare with @s2
1769 * @s2: string to compare with @s1
1771 * Compare two strings, ignoring the case of ASCII characters.
1773 * Unlike the BSD strcasecmp() function, this only recognizes standard
1774 * ASCII letters and ignores the locale, treating all non-ASCII
1775 * bytes as if they are not letters.
1777 * This function should be used only on strings that are known to be
1778 * in encodings where the bytes corresponding to ASCII letters always
1779 * represent themselves. This includes UTF-8 and the ISO-8859-*
1780 * charsets, but not for instance double-byte encodings like the
1781 * Windows Codepage 932, where the trailing bytes of double-byte
1782 * characters include all ASCII letters. If you compare two CP932
1783 * strings using this function, you will get false matches.
1785 * Both @s1 and @s2 must be non-%NULL.
1787 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1788 * or a positive value if @s1 > @s2.
1790 gint
1791 g_ascii_strcasecmp (const gchar *s1,
1792 const gchar *s2)
1794 gint c1, c2;
1796 g_return_val_if_fail (s1 != NULL, 0);
1797 g_return_val_if_fail (s2 != NULL, 0);
1799 while (*s1 && *s2)
1801 c1 = (gint)(guchar) TOLOWER (*s1);
1802 c2 = (gint)(guchar) TOLOWER (*s2);
1803 if (c1 != c2)
1804 return (c1 - c2);
1805 s1++; s2++;
1808 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1812 * g_ascii_strncasecmp:
1813 * @s1: string to compare with @s2
1814 * @s2: string to compare with @s1
1815 * @n: number of characters to compare
1817 * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1818 * characters after the first @n in each string.
1820 * Unlike the BSD strcasecmp() function, this only recognizes standard
1821 * ASCII letters and ignores the locale, treating all non-ASCII
1822 * characters as if they are not letters.
1824 * The same warning as in g_ascii_strcasecmp() applies: Use this
1825 * function only on strings known to be in encodings where bytes
1826 * corresponding to ASCII letters always represent themselves.
1828 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1829 * or a positive value if @s1 > @s2.
1831 gint
1832 g_ascii_strncasecmp (const gchar *s1,
1833 const gchar *s2,
1834 gsize n)
1836 gint c1, c2;
1838 g_return_val_if_fail (s1 != NULL, 0);
1839 g_return_val_if_fail (s2 != NULL, 0);
1841 while (n && *s1 && *s2)
1843 n -= 1;
1844 c1 = (gint)(guchar) TOLOWER (*s1);
1845 c2 = (gint)(guchar) TOLOWER (*s2);
1846 if (c1 != c2)
1847 return (c1 - c2);
1848 s1++; s2++;
1851 if (n)
1852 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1853 else
1854 return 0;
1858 * g_strcasecmp:
1859 * @s1: a string
1860 * @s2: a string to compare with @s1
1862 * A case-insensitive string comparison, corresponding to the standard
1863 * strcasecmp() function on platforms which support it.
1865 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1866 * or a positive value if @s1 > @s2.
1868 * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1869 * function is deprecated and how to replace it.
1871 gint
1872 g_strcasecmp (const gchar *s1,
1873 const gchar *s2)
1875 #ifdef HAVE_STRCASECMP
1876 g_return_val_if_fail (s1 != NULL, 0);
1877 g_return_val_if_fail (s2 != NULL, 0);
1879 return strcasecmp (s1, s2);
1880 #else
1881 gint c1, c2;
1883 g_return_val_if_fail (s1 != NULL, 0);
1884 g_return_val_if_fail (s2 != NULL, 0);
1886 while (*s1 && *s2)
1888 /* According to A. Cox, some platforms have islower's that
1889 * don't work right on non-uppercase
1891 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1892 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1893 if (c1 != c2)
1894 return (c1 - c2);
1895 s1++; s2++;
1898 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1899 #endif
1903 * g_strncasecmp:
1904 * @s1: a string
1905 * @s2: a string to compare with @s1
1906 * @n: the maximum number of characters to compare
1908 * A case-insensitive string comparison, corresponding to the standard
1909 * strncasecmp() function on platforms which support it. It is similar
1910 * to g_strcasecmp() except it only compares the first @n characters of
1911 * the strings.
1913 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1914 * or a positive value if @s1 > @s2.
1916 * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1917 * the comparison by calling toupper()/tolower(). These functions
1918 * are locale-specific and operate on single bytes. However, it is
1919 * impossible to handle things correctly from an internationalization
1920 * standpoint by operating on bytes, since characters may be multibyte.
1921 * Thus g_strncasecmp() is broken if your string is guaranteed to be
1922 * ASCII, since it is locale-sensitive, and it's broken if your string
1923 * is localized, since it doesn't work on many encodings at all,
1924 * including UTF-8, EUC-JP, etc.
1926 * There are therefore two replacement techniques: g_ascii_strncasecmp(),
1927 * which only works on ASCII and is not locale-sensitive, and
1928 * g_utf8_casefold() followed by strcmp() on the resulting strings,
1929 * which is good for case-insensitive sorting of UTF-8.
1931 gint
1932 g_strncasecmp (const gchar *s1,
1933 const gchar *s2,
1934 guint n)
1936 #ifdef HAVE_STRNCASECMP
1937 return strncasecmp (s1, s2, n);
1938 #else
1939 gint c1, c2;
1941 g_return_val_if_fail (s1 != NULL, 0);
1942 g_return_val_if_fail (s2 != NULL, 0);
1944 while (n && *s1 && *s2)
1946 n -= 1;
1947 /* According to A. Cox, some platforms have islower's that
1948 * don't work right on non-uppercase
1950 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1951 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1952 if (c1 != c2)
1953 return (c1 - c2);
1954 s1++; s2++;
1957 if (n)
1958 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1959 else
1960 return 0;
1961 #endif
1965 * g_strdelimit:
1966 * @string: the string to convert
1967 * @delimiters: (nullable): a string containing the current delimiters,
1968 * or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
1969 * @new_delimiter: the new delimiter character
1971 * Converts any delimiter characters in @string to @new_delimiter.
1972 * Any characters in @string which are found in @delimiters are
1973 * changed to the @new_delimiter character. Modifies @string in place,
1974 * and returns @string itself, not a copy. The return value is to
1975 * allow nesting such as
1976 * |[<!-- language="C" -->
1977 * g_ascii_strup (g_strdelimit (str, "abc", '?'))
1978 * ]|
1980 * Returns: @string
1982 gchar *
1983 g_strdelimit (gchar *string,
1984 const gchar *delimiters,
1985 gchar new_delim)
1987 gchar *c;
1989 g_return_val_if_fail (string != NULL, NULL);
1991 if (!delimiters)
1992 delimiters = G_STR_DELIMITERS;
1994 for (c = string; *c; c++)
1996 if (strchr (delimiters, *c))
1997 *c = new_delim;
2000 return string;
2004 * g_strcanon:
2005 * @string: a nul-terminated array of bytes
2006 * @valid_chars: bytes permitted in @string
2007 * @substitutor: replacement character for disallowed bytes
2009 * For each character in @string, if the character is not in @valid_chars,
2010 * replaces the character with @substitutor. Modifies @string in place,
2011 * and return @string itself, not a copy. The return value is to allow
2012 * nesting such as
2013 * |[<!-- language="C" -->
2014 * g_ascii_strup (g_strcanon (str, "abc", '?'))
2015 * ]|
2017 * Returns: @string
2019 gchar *
2020 g_strcanon (gchar *string,
2021 const gchar *valid_chars,
2022 gchar substitutor)
2024 gchar *c;
2026 g_return_val_if_fail (string != NULL, NULL);
2027 g_return_val_if_fail (valid_chars != NULL, NULL);
2029 for (c = string; *c; c++)
2031 if (!strchr (valid_chars, *c))
2032 *c = substitutor;
2035 return string;
2039 * g_strcompress:
2040 * @source: a string to compress
2042 * Replaces all escaped characters with their one byte equivalent.
2044 * This function does the reverse conversion of g_strescape().
2046 * Returns: a newly-allocated copy of @source with all escaped
2047 * character compressed
2049 gchar *
2050 g_strcompress (const gchar *source)
2052 const gchar *p = source, *octal;
2053 gchar *dest;
2054 gchar *q;
2056 g_return_val_if_fail (source != NULL, NULL);
2058 dest = g_malloc (strlen (source) + 1);
2059 q = dest;
2061 while (*p)
2063 if (*p == '\\')
2065 p++;
2066 switch (*p)
2068 case '\0':
2069 g_warning ("g_strcompress: trailing \\");
2070 goto out;
2071 case '0': case '1': case '2': case '3': case '4':
2072 case '5': case '6': case '7':
2073 *q = 0;
2074 octal = p;
2075 while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2077 *q = (*q * 8) + (*p - '0');
2078 p++;
2080 q++;
2081 p--;
2082 break;
2083 case 'b':
2084 *q++ = '\b';
2085 break;
2086 case 'f':
2087 *q++ = '\f';
2088 break;
2089 case 'n':
2090 *q++ = '\n';
2091 break;
2092 case 'r':
2093 *q++ = '\r';
2094 break;
2095 case 't':
2096 *q++ = '\t';
2097 break;
2098 case 'v':
2099 *q++ = '\v';
2100 break;
2101 default: /* Also handles \" and \\ */
2102 *q++ = *p;
2103 break;
2106 else
2107 *q++ = *p;
2108 p++;
2110 out:
2111 *q = 0;
2113 return dest;
2117 * g_strescape:
2118 * @source: a string to escape
2119 * @exceptions: (nullable): a string of characters not to escape in @source
2121 * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2122 * and '"' in the string @source by inserting a '\' before
2123 * them. Additionally all characters in the range 0x01-0x1F (everything
2124 * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2125 * replaced with a '\' followed by their octal representation.
2126 * Characters supplied in @exceptions are not escaped.
2128 * g_strcompress() does the reverse conversion.
2130 * Returns: a newly-allocated copy of @source with certain
2131 * characters escaped. See above.
2133 gchar *
2134 g_strescape (const gchar *source,
2135 const gchar *exceptions)
2137 const guchar *p;
2138 gchar *dest;
2139 gchar *q;
2140 guchar excmap[256];
2142 g_return_val_if_fail (source != NULL, NULL);
2144 p = (guchar *) source;
2145 /* Each source byte needs maximally four destination chars (\777) */
2146 q = dest = g_malloc (strlen (source) * 4 + 1);
2148 memset (excmap, 0, 256);
2149 if (exceptions)
2151 guchar *e = (guchar *) exceptions;
2153 while (*e)
2155 excmap[*e] = 1;
2156 e++;
2160 while (*p)
2162 if (excmap[*p])
2163 *q++ = *p;
2164 else
2166 switch (*p)
2168 case '\b':
2169 *q++ = '\\';
2170 *q++ = 'b';
2171 break;
2172 case '\f':
2173 *q++ = '\\';
2174 *q++ = 'f';
2175 break;
2176 case '\n':
2177 *q++ = '\\';
2178 *q++ = 'n';
2179 break;
2180 case '\r':
2181 *q++ = '\\';
2182 *q++ = 'r';
2183 break;
2184 case '\t':
2185 *q++ = '\\';
2186 *q++ = 't';
2187 break;
2188 case '\v':
2189 *q++ = '\\';
2190 *q++ = 'v';
2191 break;
2192 case '\\':
2193 *q++ = '\\';
2194 *q++ = '\\';
2195 break;
2196 case '"':
2197 *q++ = '\\';
2198 *q++ = '"';
2199 break;
2200 default:
2201 if ((*p < ' ') || (*p >= 0177))
2203 *q++ = '\\';
2204 *q++ = '0' + (((*p) >> 6) & 07);
2205 *q++ = '0' + (((*p) >> 3) & 07);
2206 *q++ = '0' + ((*p) & 07);
2208 else
2209 *q++ = *p;
2210 break;
2213 p++;
2215 *q = 0;
2216 return dest;
2220 * g_strchug:
2221 * @string: a string to remove the leading whitespace from
2223 * Removes leading whitespace from a string, by moving the rest
2224 * of the characters forward.
2226 * This function doesn't allocate or reallocate any memory;
2227 * it modifies @string in place. Therefore, it cannot be used on
2228 * statically allocated strings.
2230 * The pointer to @string is returned to allow the nesting of functions.
2232 * Also see g_strchomp() and g_strstrip().
2234 * Returns: @string
2236 gchar *
2237 g_strchug (gchar *string)
2239 guchar *start;
2241 g_return_val_if_fail (string != NULL, NULL);
2243 for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2246 memmove (string, start, strlen ((gchar *) start) + 1);
2248 return string;
2252 * g_strchomp:
2253 * @string: a string to remove the trailing whitespace from
2255 * Removes trailing whitespace from a string.
2257 * This function doesn't allocate or reallocate any memory;
2258 * it modifies @string in place. Therefore, it cannot be used
2259 * on statically allocated strings.
2261 * The pointer to @string is returned to allow the nesting of functions.
2263 * Also see g_strchug() and g_strstrip().
2265 * Returns: @string
2267 gchar *
2268 g_strchomp (gchar *string)
2270 gsize len;
2272 g_return_val_if_fail (string != NULL, NULL);
2274 len = strlen (string);
2275 while (len--)
2277 if (g_ascii_isspace ((guchar) string[len]))
2278 string[len] = '\0';
2279 else
2280 break;
2283 return string;
2287 * g_strsplit:
2288 * @string: a string to split
2289 * @delimiter: a string which specifies the places at which to split
2290 * the string. The delimiter is not included in any of the resulting
2291 * strings, unless @max_tokens is reached.
2292 * @max_tokens: the maximum number of pieces to split @string into.
2293 * If this is less than 1, the string is split completely.
2295 * Splits a string into a maximum of @max_tokens pieces, using the given
2296 * @delimiter. If @max_tokens is reached, the remainder of @string is
2297 * appended to the last token.
2299 * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a
2300 * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d"
2301 * and "".
2303 * As a special case, the result of splitting the empty string "" is an empty
2304 * vector, not a vector containing a single string. The reason for this
2305 * special case is that being able to represent a empty vector is typically
2306 * more useful than consistent handling of empty elements. If you do need
2307 * to represent empty elements, you'll need to check for the empty string
2308 * before calling g_strsplit().
2310 * Returns: a newly-allocated %NULL-terminated array of strings. Use
2311 * g_strfreev() to free it.
2313 gchar**
2314 g_strsplit (const gchar *string,
2315 const gchar *delimiter,
2316 gint max_tokens)
2318 GSList *string_list = NULL, *slist;
2319 gchar **str_array, *s;
2320 guint n = 0;
2321 const gchar *remainder;
2323 g_return_val_if_fail (string != NULL, NULL);
2324 g_return_val_if_fail (delimiter != NULL, NULL);
2325 g_return_val_if_fail (delimiter[0] != '\0', NULL);
2327 if (max_tokens < 1)
2328 max_tokens = G_MAXINT;
2330 remainder = string;
2331 s = strstr (remainder, delimiter);
2332 if (s)
2334 gsize delimiter_len = strlen (delimiter);
2336 while (--max_tokens && s)
2338 gsize len;
2340 len = s - remainder;
2341 string_list = g_slist_prepend (string_list,
2342 g_strndup (remainder, len));
2343 n++;
2344 remainder = s + delimiter_len;
2345 s = strstr (remainder, delimiter);
2348 if (*string)
2350 n++;
2351 string_list = g_slist_prepend (string_list, g_strdup (remainder));
2354 str_array = g_new (gchar*, n + 1);
2356 str_array[n--] = NULL;
2357 for (slist = string_list; slist; slist = slist->next)
2358 str_array[n--] = slist->data;
2360 g_slist_free (string_list);
2362 return str_array;
2366 * g_strsplit_set:
2367 * @string: The string to be tokenized
2368 * @delimiters: A nul-terminated string containing bytes that are used
2369 * to split the string.
2370 * @max_tokens: The maximum number of tokens to split @string into.
2371 * If this is less than 1, the string is split completely
2373 * Splits @string into a number of tokens not containing any of the characters
2374 * in @delimiter. A token is the (possibly empty) longest string that does not
2375 * contain any of the characters in @delimiters. If @max_tokens is reached, the
2376 * remainder is appended to the last token.
2378 * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2379 * %NULL-terminated vector containing the three strings "abc", "def",
2380 * and "ghi".
2382 * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2383 * vector containing the four strings "", "def", "ghi", and "".
2385 * As a special case, the result of splitting the empty string "" is an empty
2386 * vector, not a vector containing a single string. The reason for this
2387 * special case is that being able to represent a empty vector is typically
2388 * more useful than consistent handling of empty elements. If you do need
2389 * to represent empty elements, you'll need to check for the empty string
2390 * before calling g_strsplit_set().
2392 * Note that this function works on bytes not characters, so it can't be used
2393 * to delimit UTF-8 strings for anything but ASCII characters.
2395 * Returns: a newly-allocated %NULL-terminated array of strings. Use
2396 * g_strfreev() to free it.
2398 * Since: 2.4
2400 gchar **
2401 g_strsplit_set (const gchar *string,
2402 const gchar *delimiters,
2403 gint max_tokens)
2405 gboolean delim_table[256];
2406 GSList *tokens, *list;
2407 gint n_tokens;
2408 const gchar *s;
2409 const gchar *current;
2410 gchar *token;
2411 gchar **result;
2413 g_return_val_if_fail (string != NULL, NULL);
2414 g_return_val_if_fail (delimiters != NULL, NULL);
2416 if (max_tokens < 1)
2417 max_tokens = G_MAXINT;
2419 if (*string == '\0')
2421 result = g_new (char *, 1);
2422 result[0] = NULL;
2423 return result;
2426 memset (delim_table, FALSE, sizeof (delim_table));
2427 for (s = delimiters; *s != '\0'; ++s)
2428 delim_table[*(guchar *)s] = TRUE;
2430 tokens = NULL;
2431 n_tokens = 0;
2433 s = current = string;
2434 while (*s != '\0')
2436 if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2438 token = g_strndup (current, s - current);
2439 tokens = g_slist_prepend (tokens, token);
2440 ++n_tokens;
2442 current = s + 1;
2445 ++s;
2448 token = g_strndup (current, s - current);
2449 tokens = g_slist_prepend (tokens, token);
2450 ++n_tokens;
2452 result = g_new (gchar *, n_tokens + 1);
2454 result[n_tokens] = NULL;
2455 for (list = tokens; list != NULL; list = list->next)
2456 result[--n_tokens] = list->data;
2458 g_slist_free (tokens);
2460 return result;
2464 * GStrv:
2466 * A typedef alias for gchar**. This is mostly useful when used together with
2467 * g_auto().
2471 * g_strfreev:
2472 * @str_array: (nullable): a %NULL-terminated array of strings to free
2474 * Frees a %NULL-terminated array of strings, as well as each
2475 * string it contains.
2477 * If @str_array is %NULL, this function simply returns.
2479 void
2480 g_strfreev (gchar **str_array)
2482 if (str_array)
2484 int i;
2486 for (i = 0; str_array[i] != NULL; i++)
2487 g_free (str_array[i]);
2489 g_free (str_array);
2494 * g_strdupv:
2495 * @str_array: (nullable): a %NULL-terminated array of strings
2497 * Copies %NULL-terminated array of strings. The copy is a deep copy;
2498 * the new array should be freed by first freeing each string, then
2499 * the array itself. g_strfreev() does this for you. If called
2500 * on a %NULL value, g_strdupv() simply returns %NULL.
2502 * Returns: (nullable): a new %NULL-terminated array of strings.
2504 gchar**
2505 g_strdupv (gchar **str_array)
2507 if (str_array)
2509 gint i;
2510 gchar **retval;
2512 i = 0;
2513 while (str_array[i])
2514 ++i;
2516 retval = g_new (gchar*, i + 1);
2518 i = 0;
2519 while (str_array[i])
2521 retval[i] = g_strdup (str_array[i]);
2522 ++i;
2524 retval[i] = NULL;
2526 return retval;
2528 else
2529 return NULL;
2533 * g_strjoinv:
2534 * @separator: (nullable): a string to insert between each of the
2535 * strings, or %NULL
2536 * @str_array: a %NULL-terminated array of strings to join
2538 * Joins a number of strings together to form one long string, with the
2539 * optional @separator inserted between each of them. The returned string
2540 * should be freed with g_free().
2542 * If @str_array has no items, the return value will be an
2543 * empty string. If @str_array contains a single item, @separator will not
2544 * appear in the resulting string.
2546 * Returns: a newly-allocated string containing all of the strings joined
2547 * together, with @separator between them
2549 gchar*
2550 g_strjoinv (const gchar *separator,
2551 gchar **str_array)
2553 gchar *string;
2554 gchar *ptr;
2556 g_return_val_if_fail (str_array != NULL, NULL);
2558 if (separator == NULL)
2559 separator = "";
2561 if (*str_array)
2563 gint i;
2564 gsize len;
2565 gsize separator_len;
2567 separator_len = strlen (separator);
2568 /* First part, getting length */
2569 len = 1 + strlen (str_array[0]);
2570 for (i = 1; str_array[i] != NULL; i++)
2571 len += strlen (str_array[i]);
2572 len += separator_len * (i - 1);
2574 /* Second part, building string */
2575 string = g_new (gchar, len);
2576 ptr = g_stpcpy (string, *str_array);
2577 for (i = 1; str_array[i] != NULL; i++)
2579 ptr = g_stpcpy (ptr, separator);
2580 ptr = g_stpcpy (ptr, str_array[i]);
2583 else
2584 string = g_strdup ("");
2586 return string;
2590 * g_strjoin:
2591 * @separator: (nullable): a string to insert between each of the
2592 * strings, or %NULL
2593 * @...: a %NULL-terminated list of strings to join
2595 * Joins a number of strings together to form one long string, with the
2596 * optional @separator inserted between each of them. The returned string
2597 * should be freed with g_free().
2599 * Returns: a newly-allocated string containing all of the strings joined
2600 * together, with @separator between them
2602 gchar*
2603 g_strjoin (const gchar *separator,
2604 ...)
2606 gchar *string, *s;
2607 va_list args;
2608 gsize len;
2609 gsize separator_len;
2610 gchar *ptr;
2612 if (separator == NULL)
2613 separator = "";
2615 separator_len = strlen (separator);
2617 va_start (args, separator);
2619 s = va_arg (args, gchar*);
2621 if (s)
2623 /* First part, getting length */
2624 len = 1 + strlen (s);
2626 s = va_arg (args, gchar*);
2627 while (s)
2629 len += separator_len + strlen (s);
2630 s = va_arg (args, gchar*);
2632 va_end (args);
2634 /* Second part, building string */
2635 string = g_new (gchar, len);
2637 va_start (args, separator);
2639 s = va_arg (args, gchar*);
2640 ptr = g_stpcpy (string, s);
2642 s = va_arg (args, gchar*);
2643 while (s)
2645 ptr = g_stpcpy (ptr, separator);
2646 ptr = g_stpcpy (ptr, s);
2647 s = va_arg (args, gchar*);
2650 else
2651 string = g_strdup ("");
2653 va_end (args);
2655 return string;
2660 * g_strstr_len:
2661 * @haystack: a string
2662 * @haystack_len: the maximum length of @haystack. Note that -1 is
2663 * a valid length, if @haystack is nul-terminated, meaning it will
2664 * search through the whole string.
2665 * @needle: the string to search for
2667 * Searches the string @haystack for the first occurrence
2668 * of the string @needle, limiting the length of the search
2669 * to @haystack_len.
2671 * Returns: a pointer to the found occurrence, or
2672 * %NULL if not found.
2674 gchar *
2675 g_strstr_len (const gchar *haystack,
2676 gssize haystack_len,
2677 const gchar *needle)
2679 g_return_val_if_fail (haystack != NULL, NULL);
2680 g_return_val_if_fail (needle != NULL, NULL);
2682 if (haystack_len < 0)
2683 return strstr (haystack, needle);
2684 else
2686 const gchar *p = haystack;
2687 gsize needle_len = strlen (needle);
2688 const gchar *end;
2689 gsize i;
2691 if (needle_len == 0)
2692 return (gchar *)haystack;
2694 if (haystack_len < needle_len)
2695 return NULL;
2697 end = haystack + haystack_len - needle_len;
2699 while (p <= end && *p)
2701 for (i = 0; i < needle_len; i++)
2702 if (p[i] != needle[i])
2703 goto next;
2705 return (gchar *)p;
2707 next:
2708 p++;
2711 return NULL;
2716 * g_strrstr:
2717 * @haystack: a nul-terminated string
2718 * @needle: the nul-terminated string to search for
2720 * Searches the string @haystack for the last occurrence
2721 * of the string @needle.
2723 * Returns: a pointer to the found occurrence, or
2724 * %NULL if not found.
2726 gchar *
2727 g_strrstr (const gchar *haystack,
2728 const gchar *needle)
2730 gsize i;
2731 gsize needle_len;
2732 gsize haystack_len;
2733 const gchar *p;
2735 g_return_val_if_fail (haystack != NULL, NULL);
2736 g_return_val_if_fail (needle != NULL, NULL);
2738 needle_len = strlen (needle);
2739 haystack_len = strlen (haystack);
2741 if (needle_len == 0)
2742 return (gchar *)haystack;
2744 if (haystack_len < needle_len)
2745 return NULL;
2747 p = haystack + haystack_len - needle_len;
2749 while (p >= haystack)
2751 for (i = 0; i < needle_len; i++)
2752 if (p[i] != needle[i])
2753 goto next;
2755 return (gchar *)p;
2757 next:
2758 p--;
2761 return NULL;
2765 * g_strrstr_len:
2766 * @haystack: a nul-terminated string
2767 * @haystack_len: the maximum length of @haystack
2768 * @needle: the nul-terminated string to search for
2770 * Searches the string @haystack for the last occurrence
2771 * of the string @needle, limiting the length of the search
2772 * to @haystack_len.
2774 * Returns: a pointer to the found occurrence, or
2775 * %NULL if not found.
2777 gchar *
2778 g_strrstr_len (const gchar *haystack,
2779 gssize haystack_len,
2780 const gchar *needle)
2782 g_return_val_if_fail (haystack != NULL, NULL);
2783 g_return_val_if_fail (needle != NULL, NULL);
2785 if (haystack_len < 0)
2786 return g_strrstr (haystack, needle);
2787 else
2789 gsize needle_len = strlen (needle);
2790 const gchar *haystack_max = haystack + haystack_len;
2791 const gchar *p = haystack;
2792 gsize i;
2794 while (p < haystack_max && *p)
2795 p++;
2797 if (p < haystack + needle_len)
2798 return NULL;
2800 p -= needle_len;
2802 while (p >= haystack)
2804 for (i = 0; i < needle_len; i++)
2805 if (p[i] != needle[i])
2806 goto next;
2808 return (gchar *)p;
2810 next:
2811 p--;
2814 return NULL;
2820 * g_str_has_suffix:
2821 * @str: a nul-terminated string
2822 * @suffix: the nul-terminated suffix to look for
2824 * Looks whether the string @str ends with @suffix.
2826 * Returns: %TRUE if @str end with @suffix, %FALSE otherwise.
2828 * Since: 2.2
2830 gboolean
2831 g_str_has_suffix (const gchar *str,
2832 const gchar *suffix)
2834 int str_len;
2835 int suffix_len;
2837 g_return_val_if_fail (str != NULL, FALSE);
2838 g_return_val_if_fail (suffix != NULL, FALSE);
2840 str_len = strlen (str);
2841 suffix_len = strlen (suffix);
2843 if (str_len < suffix_len)
2844 return FALSE;
2846 return strcmp (str + str_len - suffix_len, suffix) == 0;
2850 * g_str_has_prefix:
2851 * @str: a nul-terminated string
2852 * @prefix: the nul-terminated prefix to look for
2854 * Looks whether the string @str begins with @prefix.
2856 * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise.
2858 * Since: 2.2
2860 gboolean
2861 g_str_has_prefix (const gchar *str,
2862 const gchar *prefix)
2864 g_return_val_if_fail (str != NULL, FALSE);
2865 g_return_val_if_fail (prefix != NULL, FALSE);
2867 return strncmp (str, prefix, strlen (prefix)) == 0;
2871 * g_strv_length:
2872 * @str_array: a %NULL-terminated array of strings
2874 * Returns the length of the given %NULL-terminated
2875 * string array @str_array.
2877 * Returns: length of @str_array.
2879 * Since: 2.6
2881 guint
2882 g_strv_length (gchar **str_array)
2884 guint i = 0;
2886 g_return_val_if_fail (str_array != NULL, 0);
2888 while (str_array[i])
2889 ++i;
2891 return i;
2894 static void
2895 index_add_folded (GPtrArray *array,
2896 const gchar *start,
2897 const gchar *end)
2899 gchar *normal;
2901 normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2903 /* TODO: Invent time machine. Converse with Mustafa Ataturk... */
2904 if (strstr (normal, "ı") || strstr (normal, "İ"))
2906 gchar *s = normal;
2907 GString *tmp;
2909 tmp = g_string_new (NULL);
2911 while (*s)
2913 gchar *i, *I, *e;
2915 i = strstr (s, "ı");
2916 I = strstr (s, "İ");
2918 if (!i && !I)
2919 break;
2920 else if (i && !I)
2921 e = i;
2922 else if (I && !i)
2923 e = I;
2924 else if (i < I)
2925 e = i;
2926 else
2927 e = I;
2929 g_string_append_len (tmp, s, e - s);
2930 g_string_append_c (tmp, 'i');
2931 s = g_utf8_next_char (e);
2934 g_string_append (tmp, s);
2935 g_free (normal);
2936 normal = g_string_free (tmp, FALSE);
2939 g_ptr_array_add (array, g_utf8_casefold (normal, -1));
2940 g_free (normal);
2943 static gchar **
2944 split_words (const gchar *value)
2946 const gchar *start = NULL;
2947 GPtrArray *result;
2948 const gchar *s;
2950 result = g_ptr_array_new ();
2952 for (s = value; *s; s = g_utf8_next_char (s))
2954 gunichar c = g_utf8_get_char (s);
2956 if (start == NULL)
2958 if (g_unichar_isalnum (c) || g_unichar_ismark (c))
2959 start = s;
2961 else
2963 if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
2965 index_add_folded (result, start, s);
2966 start = NULL;
2971 if (start)
2972 index_add_folded (result, start, s);
2974 g_ptr_array_add (result, NULL);
2976 return (gchar **) g_ptr_array_free (result, FALSE);
2980 * g_str_tokenize_and_fold:
2981 * @string: a string
2982 * @translit_locale: (nullable): the language code (like 'de' or
2983 * 'en_GB') from which @string originates
2984 * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
2985 * return location for ASCII alternates
2987 * Tokenises @string and performs folding on each token.
2989 * A token is a non-empty sequence of alphanumeric characters in the
2990 * source string, separated by non-alphanumeric characters. An
2991 * "alphanumeric" character for this purpose is one that matches
2992 * g_unichar_isalnum() or g_unichar_ismark().
2994 * Each token is then (Unicode) normalised and case-folded. If
2995 * @ascii_alternates is non-%NULL and some of the returned tokens
2996 * contain non-ASCII characters, ASCII alternatives will be generated.
2998 * The number of ASCII alternatives that are generated and the method
2999 * for doing so is unspecified, but @translit_locale (if specified) may
3000 * improve the transliteration if the language of the source string is
3001 * known.
3003 * Returns: (transfer full) (array zero-terminated=1): the folded tokens
3005 * Since: 2.40
3007 gchar **
3008 g_str_tokenize_and_fold (const gchar *string,
3009 const gchar *translit_locale,
3010 gchar ***ascii_alternates)
3012 gchar **result;
3014 g_return_val_if_fail (string != NULL, NULL);
3016 if (ascii_alternates && g_str_is_ascii (string))
3018 *ascii_alternates = g_new0 (gchar *, 0 + 1);
3019 ascii_alternates = NULL;
3022 result = split_words (string);
3024 if (ascii_alternates)
3026 gint i, j, n;
3028 n = g_strv_length (result);
3029 *ascii_alternates = g_new (gchar *, n + 1);
3030 j = 0;
3032 for (i = 0; i < n; i++)
3034 if (!g_str_is_ascii (result[i]))
3036 gchar *composed;
3037 gchar *ascii;
3038 gint k;
3040 composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
3042 ascii = g_str_to_ascii (composed, translit_locale);
3044 /* Only accept strings that are now entirely alnums */
3045 for (k = 0; ascii[k]; k++)
3046 if (!g_ascii_isalnum (ascii[k]))
3047 break;
3049 if (ascii[k] == '\0')
3050 /* Made it to the end... */
3051 (*ascii_alternates)[j++] = ascii;
3052 else
3053 g_free (ascii);
3055 g_free (composed);
3059 (*ascii_alternates)[j] = NULL;
3062 return result;
3066 * g_str_match_string:
3067 * @search_term: the search term from the user
3068 * @potential_hit: the text that may be a hit
3069 * @accept_alternates: %TRUE to accept ASCII alternates
3071 * Checks if a search conducted for @search_term should match
3072 * @potential_hit.
3074 * This function calls g_str_tokenize_and_fold() on both
3075 * @search_term and @potential_hit. ASCII alternates are never taken
3076 * for @search_term but will be taken for @potential_hit according to
3077 * the value of @accept_alternates.
3079 * A hit occurs when each folded token in @search_term is a prefix of a
3080 * folded token from @potential_hit.
3082 * Depending on how you're performing the search, it will typically be
3083 * faster to call g_str_tokenize_and_fold() on each string in
3084 * your corpus and build an index on the returned folded tokens, then
3085 * call g_str_tokenize_and_fold() on the search term and
3086 * perform lookups into that index.
3088 * As some examples, searching for "fred" would match the potential hit
3089 * "Smith, Fred" and also "Frédéric". Searching for "Fréd" would match
3090 * "Frédéric" but not "Frederic" (due to the one-directional nature of
3091 * accent matching). Searching "fo" would match "Foo" and "Bar Foo
3092 * Baz", but not "SFO" (because no word as "fo" as a prefix).
3094 * Returns: %TRUE if @potential_hit is a hit
3096 * Since: 2.40
3098 gboolean
3099 g_str_match_string (const gchar *search_term,
3100 const gchar *potential_hit,
3101 gboolean accept_alternates)
3103 gchar **alternates = NULL;
3104 gchar **term_tokens;
3105 gchar **hit_tokens;
3106 gboolean matched;
3107 gint i, j;
3109 g_return_val_if_fail (search_term != NULL, FALSE);
3110 g_return_val_if_fail (potential_hit != NULL, FALSE);
3112 term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3113 hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3115 matched = TRUE;
3117 for (i = 0; term_tokens[i]; i++)
3119 for (j = 0; hit_tokens[j]; j++)
3120 if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3121 goto one_matched;
3123 if (accept_alternates)
3124 for (j = 0; alternates[j]; j++)
3125 if (g_str_has_prefix (alternates[j], term_tokens[i]))
3126 goto one_matched;
3128 matched = FALSE;
3129 break;
3131 one_matched:
3132 continue;
3135 g_strfreev (term_tokens);
3136 g_strfreev (hit_tokens);
3137 g_strfreev (alternates);
3139 return matched;
3143 * g_strv_contains:
3144 * @strv: a %NULL-terminated array of strings
3145 * @str: a string
3147 * Checks if @strv contains @str. @strv must not be %NULL.
3149 * Returns: %TRUE if @str is an element of @strv, according to g_str_equal().
3151 * Since: 2.44
3153 gboolean
3154 g_strv_contains (const gchar * const *strv,
3155 const gchar *str)
3157 g_return_val_if_fail (strv != NULL, FALSE);
3158 g_return_val_if_fail (str != NULL, FALSE);
3160 for (; *strv != NULL; strv++)
3162 if (g_str_equal (str, *strv))
3163 return TRUE;
3166 return FALSE;