Update translations from Transifex
[midnight-commander.git] / lib / strutil / strutil.c
bloba86598ce8c2cfb75268cc36e50485e77392040c7
1 /*
2 Common strings utilities
4 Copyright (C) 2007-2025
5 Free Software Foundation, Inc.
7 Written by:
8 Rostislav Benes, 2007
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 #include <config.h>
28 #include <stdlib.h>
29 #include <langinfo.h>
30 #include <string.h>
31 #include <errno.h>
33 #include "lib/global.h"
34 #include "lib/util.h" /* MC_PTR_FREE */
35 #include "lib/strutil.h"
37 /*** global variables ****************************************************************************/
39 GIConv str_cnv_to_term;
40 GIConv str_cnv_from_term;
41 GIConv str_cnv_not_convert = INVALID_CONV;
43 /*** file scope macro definitions ****************************************************************/
45 /*** file scope type declarations ****************************************************************/
47 /*** forward declarations (file scope functions) *************************************************/
49 /*** file scope variables ************************************************************************/
51 /* names, that are used for utf-8 */
52 static const char *const str_utf8_encodings[] = {
53 "utf-8",
54 "utf8",
55 NULL
58 /* standard 8bit encodings, no wide or multibytes characters */
59 static const char *const str_8bit_encodings[] = {
60 "cp-1251",
61 "cp1251",
62 /* solaris */
63 "ansi-1251",
64 "ansi1251",
65 "cp-1250",
66 "cp1250",
67 "cp-866",
68 "cp866",
69 /* glibc */
70 "ibm-866",
71 "ibm866",
72 "cp-850",
73 "cp850",
74 "cp-852",
75 "cp852",
76 "iso-8859",
77 "iso8859",
78 "koi8",
79 NULL
82 /* terminal encoding */
83 static char *codeset = NULL;
84 static char *term_encoding = NULL;
85 /* function for encoding specific operations */
86 static struct str_class used_class;
88 /* --------------------------------------------------------------------------------------------- */
89 /*** file scope functions ************************************************************************/
90 /* --------------------------------------------------------------------------------------------- */
92 /* if enc is same encoding like on terminal */
93 static int
94 str_test_not_convert (const char *enc)
96 return g_ascii_strcasecmp (enc, codeset) == 0;
99 /* --------------------------------------------------------------------------------------------- */
101 static estr_t
102 _str_convert (GIConv coder, const char *string, int size, GString *buffer)
104 estr_t state = ESTR_SUCCESS;
105 gssize left;
106 gsize bytes_read = 0;
107 gsize bytes_written = 0;
109 errno = 0; /* FIXME: is it really needed? */
111 if (coder == INVALID_CONV)
112 return ESTR_FAILURE;
114 if (string == NULL || buffer == NULL)
115 return ESTR_FAILURE;
118 if (! used_class.is_valid_string (string))
120 return ESTR_FAILURE;
123 if (size < 0)
124 size = strlen (string);
125 else
127 left = strlen (string);
128 if (left < size)
129 size = left;
132 left = size;
133 g_iconv (coder, NULL, NULL, NULL, NULL);
135 while (left != 0)
137 gchar *tmp_buff;
138 GError *mcerror = NULL;
140 tmp_buff = g_convert_with_iconv ((const gchar *) string,
141 left, coder, &bytes_read, &bytes_written, &mcerror);
142 if (mcerror != NULL)
144 int code = mcerror->code;
146 g_error_free (mcerror);
147 mcerror = NULL;
149 switch (code)
151 case G_CONVERT_ERROR_NO_CONVERSION:
152 /* Conversion between the requested character sets is not supported. */
153 g_free (tmp_buff);
154 mc_g_string_append_c_len (buffer, '?', strlen (string));
155 return ESTR_FAILURE;
157 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
158 /* Invalid byte sequence in conversion input. */
159 if ((tmp_buff == NULL) && (bytes_read != 0))
160 /* recode valid byte sequence */
161 tmp_buff = g_convert_with_iconv ((const gchar *) string,
162 bytes_read, coder, NULL, NULL, NULL);
164 if (tmp_buff != NULL)
166 g_string_append (buffer, tmp_buff);
167 g_free (tmp_buff);
170 if ((int) bytes_read >= left)
171 return ESTR_PROBLEM;
173 string += bytes_read + 1;
174 size -= (bytes_read + 1);
175 left -= (bytes_read + 1);
176 g_string_append_c (buffer, *(string - 1));
177 state = ESTR_PROBLEM;
178 break;
180 case G_CONVERT_ERROR_PARTIAL_INPUT:
181 /* Partial character sequence at end of input. */
182 g_string_append (buffer, tmp_buff);
183 g_free (tmp_buff);
184 if ((int) bytes_read < left)
185 mc_g_string_append_c_len (buffer, '?', left - bytes_read);
186 return ESTR_PROBLEM;
188 case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
189 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
190 case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
191 default:
192 g_free (tmp_buff);
193 return ESTR_FAILURE;
196 else if (tmp_buff == NULL)
198 g_string_append (buffer, string);
199 return ESTR_PROBLEM;
201 else if (*tmp_buff == '\0')
203 g_free (tmp_buff);
204 g_string_append (buffer, string);
205 return state;
207 else
209 g_string_append (buffer, tmp_buff);
210 g_free (tmp_buff);
211 string += bytes_read;
212 left -= bytes_read;
216 return state;
219 /* --------------------------------------------------------------------------------------------- */
221 static int
222 str_test_encoding_class (const char *encoding, const char *const *table)
224 int result = 0;
226 if (encoding != NULL)
228 int t;
230 for (t = 0; table[t] != NULL; t++)
231 if (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0)
232 result++;
235 return result;
238 /* --------------------------------------------------------------------------------------------- */
240 static void
241 str_choose_str_functions (void)
243 if (str_test_encoding_class (codeset, str_utf8_encodings))
244 used_class = str_utf8_init ();
245 else if (str_test_encoding_class (codeset, str_8bit_encodings))
246 used_class = str_8bit_init ();
247 else
248 used_class = str_ascii_init ();
251 /* --------------------------------------------------------------------------------------------- */
252 /*** public functions ****************************************************************************/
253 /* --------------------------------------------------------------------------------------------- */
255 GIConv
256 str_crt_conv_to (const char *to_enc)
258 return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
261 /* --------------------------------------------------------------------------------------------- */
263 GIConv
264 str_crt_conv_from (const char *from_enc)
266 return (!str_test_not_convert (from_enc))
267 ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
270 /* --------------------------------------------------------------------------------------------- */
272 void
273 str_close_conv (GIConv conv)
275 if (conv != INVALID_CONV && conv != str_cnv_not_convert)
276 g_iconv_close (conv);
279 /* --------------------------------------------------------------------------------------------- */
281 estr_t
282 str_convert (GIConv coder, const char *string, GString *buffer)
284 return _str_convert (coder, string, -1, buffer);
287 /* --------------------------------------------------------------------------------------------- */
289 estr_t
290 str_nconvert (GIConv coder, const char *string, int size, GString *buffer)
292 return _str_convert (coder, string, size, buffer);
295 /* --------------------------------------------------------------------------------------------- */
297 gchar *
298 str_conv_gerror_message (GError *mcerror, const char *def_msg)
300 return used_class.conv_gerror_message (mcerror, def_msg);
303 /* --------------------------------------------------------------------------------------------- */
305 estr_t
306 str_vfs_convert_from (GIConv coder, const char *string, GString *buffer)
308 estr_t result = ESTR_SUCCESS;
310 if (coder == str_cnv_not_convert)
311 g_string_append (buffer, string != NULL ? string : "");
312 else
313 result = _str_convert (coder, string, -1, buffer);
315 return result;
318 /* --------------------------------------------------------------------------------------------- */
320 estr_t
321 str_vfs_convert_to (GIConv coder, const char *string, int size, GString *buffer)
323 return used_class.vfs_convert_to (coder, string, size, buffer);
326 /* --------------------------------------------------------------------------------------------- */
328 void
329 str_printf (GString *buffer, const char *format, ...)
331 va_list ap;
332 va_start (ap, format);
334 g_string_append_vprintf (buffer, format, ap);
335 va_end (ap);
338 /* --------------------------------------------------------------------------------------------- */
340 void
341 str_insert_replace_char (GString *buffer)
343 used_class.insert_replace_char (buffer);
346 /* --------------------------------------------------------------------------------------------- */
348 estr_t
349 str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
351 size_t left;
352 size_t cnv;
354 g_iconv (conv, NULL, NULL, NULL, NULL);
356 left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
358 cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
359 if (cnv == (size_t) (-1))
360 return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
362 output[0] = '\0';
363 return ESTR_SUCCESS;
366 /* --------------------------------------------------------------------------------------------- */
368 const char *
369 str_detect_termencoding (void)
371 if (term_encoding == NULL)
373 /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
374 to utf-8 or UTF-8.
375 On Mac OS X, it returns the same case as the LANG input.
376 So let transform result of nl_langinfo (CODESET) to upper case unconditionally. */
377 term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
380 return term_encoding;
383 /* --------------------------------------------------------------------------------------------- */
385 gboolean
386 str_isutf8 (const char *codeset_name)
388 return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
391 /* --------------------------------------------------------------------------------------------- */
393 void
394 str_init_strings (const char *termenc)
396 codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
398 str_cnv_not_convert = g_iconv_open (codeset, codeset);
399 if (str_cnv_not_convert == INVALID_CONV)
401 if (termenc != NULL)
403 g_free (codeset);
404 codeset = g_strdup (str_detect_termencoding ());
405 str_cnv_not_convert = g_iconv_open (codeset, codeset);
408 if (str_cnv_not_convert == INVALID_CONV)
410 g_free (codeset);
411 codeset = g_strdup (DEFAULT_CHARSET);
412 str_cnv_not_convert = g_iconv_open (codeset, codeset);
416 str_cnv_to_term = str_cnv_not_convert;
417 str_cnv_from_term = str_cnv_not_convert;
419 str_choose_str_functions ();
422 /* --------------------------------------------------------------------------------------------- */
424 void
425 str_uninit_strings (void)
427 if (str_cnv_not_convert != INVALID_CONV)
428 g_iconv_close (str_cnv_not_convert);
429 /* NULL-ize pointers to avoid double free in unit tests */
430 MC_PTR_FREE (term_encoding);
431 MC_PTR_FREE (codeset);
434 /* --------------------------------------------------------------------------------------------- */
436 const char *
437 str_term_form (const char *text)
439 return used_class.term_form (text);
442 /* --------------------------------------------------------------------------------------------- */
444 const char *
445 str_fit_to_term (const char *text, int width, align_crt_t just_mode)
447 return used_class.fit_to_term (text, width, just_mode);
450 /* --------------------------------------------------------------------------------------------- */
452 const char *
453 str_term_trim (const char *text, int width)
455 return used_class.term_trim (text, width);
458 /* --------------------------------------------------------------------------------------------- */
460 const char *
461 str_term_substring (const char *text, int start, int width)
463 return used_class.term_substring (text, start, width);
466 /* --------------------------------------------------------------------------------------------- */
468 char *
469 str_get_next_char (char *text)
472 used_class.cnext_char ((const char **) &text);
473 return text;
476 /* --------------------------------------------------------------------------------------------- */
478 const char *
479 str_cget_next_char (const char *text)
481 used_class.cnext_char (&text);
482 return text;
485 /* --------------------------------------------------------------------------------------------- */
487 void
488 str_next_char (char **text)
490 used_class.cnext_char ((const char **) text);
493 /* --------------------------------------------------------------------------------------------- */
495 void
496 str_cnext_char (const char **text)
498 used_class.cnext_char (text);
501 /* --------------------------------------------------------------------------------------------- */
503 char *
504 str_get_prev_char (char *text)
506 used_class.cprev_char ((const char **) &text);
507 return text;
510 /* --------------------------------------------------------------------------------------------- */
512 const char *
513 str_cget_prev_char (const char *text)
515 used_class.cprev_char (&text);
516 return text;
519 /* --------------------------------------------------------------------------------------------- */
521 void
522 str_prev_char (char **text)
524 used_class.cprev_char ((const char **) text);
527 /* --------------------------------------------------------------------------------------------- */
529 void
530 str_cprev_char (const char **text)
532 used_class.cprev_char (text);
535 /* --------------------------------------------------------------------------------------------- */
537 char *
538 str_get_next_char_safe (char *text)
540 used_class.cnext_char_safe ((const char **) &text);
541 return text;
544 /* --------------------------------------------------------------------------------------------- */
546 const char *
547 str_cget_next_char_safe (const char *text)
549 used_class.cnext_char_safe (&text);
550 return text;
553 /* --------------------------------------------------------------------------------------------- */
555 void
556 str_next_char_safe (char **text)
558 used_class.cnext_char_safe ((const char **) text);
561 /* --------------------------------------------------------------------------------------------- */
563 void
564 str_cnext_char_safe (const char **text)
566 used_class.cnext_char_safe (text);
569 /* --------------------------------------------------------------------------------------------- */
571 char *
572 str_get_prev_char_safe (char *text)
574 used_class.cprev_char_safe ((const char **) &text);
575 return text;
578 /* --------------------------------------------------------------------------------------------- */
580 const char *
581 str_cget_prev_char_safe (const char *text)
583 used_class.cprev_char_safe (&text);
584 return text;
587 /* --------------------------------------------------------------------------------------------- */
589 void
590 str_prev_char_safe (char **text)
592 used_class.cprev_char_safe ((const char **) text);
595 /* --------------------------------------------------------------------------------------------- */
597 void
598 str_cprev_char_safe (const char **text)
600 used_class.cprev_char_safe (text);
603 /* --------------------------------------------------------------------------------------------- */
606 str_next_noncomb_char (char **text)
608 return used_class.cnext_noncomb_char ((const char **) text);
611 /* --------------------------------------------------------------------------------------------- */
614 str_cnext_noncomb_char (const char **text)
616 return used_class.cnext_noncomb_char (text);
619 /* --------------------------------------------------------------------------------------------- */
622 str_prev_noncomb_char (char **text, const char *begin)
624 return used_class.cprev_noncomb_char ((const char **) text, begin);
627 /* --------------------------------------------------------------------------------------------- */
630 str_cprev_noncomb_char (const char **text, const char *begin)
632 return used_class.cprev_noncomb_char (text, begin);
635 /* --------------------------------------------------------------------------------------------- */
638 str_is_valid_char (const char *ch, size_t size)
640 return used_class.is_valid_char (ch, size);
643 /* --------------------------------------------------------------------------------------------- */
646 str_term_width1 (const char *text)
648 return used_class.term_width1 (text);
651 /* --------------------------------------------------------------------------------------------- */
654 str_term_width2 (const char *text, size_t length)
656 return used_class.term_width2 (text, length);
659 /* --------------------------------------------------------------------------------------------- */
662 str_term_char_width (const char *text)
664 return used_class.term_char_width (text);
667 /* --------------------------------------------------------------------------------------------- */
670 str_offset_to_pos (const char *text, size_t length)
672 return used_class.offset_to_pos (text, length);
675 /* --------------------------------------------------------------------------------------------- */
678 str_length (const char *text)
680 return used_class.length (text);
683 /* --------------------------------------------------------------------------------------------- */
686 str_length_char (const char *text)
688 return str_cget_next_char_safe (text) - text;
691 /* --------------------------------------------------------------------------------------------- */
694 str_length2 (const char *text, int size)
696 return used_class.length2 (text, size);
699 /* --------------------------------------------------------------------------------------------- */
702 str_length_noncomb (const char *text)
704 return used_class.length_noncomb (text);
707 /* --------------------------------------------------------------------------------------------- */
710 str_column_to_pos (const char *text, size_t pos)
712 return used_class.column_to_pos (text, pos);
715 /* --------------------------------------------------------------------------------------------- */
717 gboolean
718 str_isspace (const char *ch)
720 return used_class.char_isspace (ch);
723 /* --------------------------------------------------------------------------------------------- */
725 gboolean
726 str_ispunct (const char *ch)
728 return used_class.char_ispunct (ch);
731 /* --------------------------------------------------------------------------------------------- */
733 gboolean
734 str_isalnum (const char *ch)
736 return used_class.char_isalnum (ch);
739 /* --------------------------------------------------------------------------------------------- */
741 gboolean
742 str_isdigit (const char *ch)
744 return used_class.char_isdigit (ch);
747 /* --------------------------------------------------------------------------------------------- */
749 gboolean
750 str_toupper (const char *ch, char **out, size_t *remain)
752 return used_class.char_toupper (ch, out, remain);
755 /* --------------------------------------------------------------------------------------------- */
757 gboolean
758 str_tolower (const char *ch, char **out, size_t *remain)
760 return used_class.char_tolower (ch, out, remain);
763 /* --------------------------------------------------------------------------------------------- */
765 gboolean
766 str_isprint (const char *ch)
768 return used_class.char_isprint (ch);
771 /* --------------------------------------------------------------------------------------------- */
773 gboolean
774 str_iscombiningmark (const char *ch)
776 return used_class.char_iscombiningmark (ch);
779 /* --------------------------------------------------------------------------------------------- */
781 const char *
782 str_trunc (const char *text, int width)
784 return used_class.trunc (text, width);
787 /* --------------------------------------------------------------------------------------------- */
789 char *
790 str_create_search_needle (const char *needle, gboolean case_sen)
792 return used_class.create_search_needle (needle, case_sen);
795 /* --------------------------------------------------------------------------------------------- */
797 void
798 str_release_search_needle (char *needle, gboolean case_sen)
800 used_class.release_search_needle (needle, case_sen);
803 /* --------------------------------------------------------------------------------------------- */
805 const char *
806 str_search_first (const char *text, const char *search, gboolean case_sen)
808 return used_class.search_first (text, search, case_sen);
811 /* --------------------------------------------------------------------------------------------- */
813 const char *
814 str_search_last (const char *text, const char *search, gboolean case_sen)
816 return used_class.search_last (text, search, case_sen);
819 /* --------------------------------------------------------------------------------------------- */
821 gboolean
822 str_is_valid_string (const char *text)
824 return used_class.is_valid_string (text);
827 /* --------------------------------------------------------------------------------------------- */
830 str_compare (const char *t1, const char *t2)
832 return used_class.compare (t1, t2);
835 /* --------------------------------------------------------------------------------------------- */
838 str_ncompare (const char *t1, const char *t2)
840 return used_class.ncompare (t1, t2);
843 /* --------------------------------------------------------------------------------------------- */
846 str_casecmp (const char *t1, const char *t2)
848 return used_class.casecmp (t1, t2);
851 /* --------------------------------------------------------------------------------------------- */
854 str_ncasecmp (const char *t1, const char *t2)
856 return used_class.ncasecmp (t1, t2);
859 /* --------------------------------------------------------------------------------------------- */
862 str_prefix (const char *text, const char *prefix)
864 return used_class.prefix (text, prefix);
867 /* --------------------------------------------------------------------------------------------- */
870 str_caseprefix (const char *text, const char *prefix)
872 return used_class.caseprefix (text, prefix);
875 /* --------------------------------------------------------------------------------------------- */
877 void
878 str_fix_string (char *text)
880 used_class.fix_string (text);
883 /* --------------------------------------------------------------------------------------------- */
885 char *
886 str_create_key (const char *text, gboolean case_sen)
888 return used_class.create_key (text, case_sen);
891 /* --------------------------------------------------------------------------------------------- */
893 char *
894 str_create_key_for_filename (const char *text, gboolean case_sen)
896 return used_class.create_key_for_filename (text, case_sen);
899 /* --------------------------------------------------------------------------------------------- */
902 str_key_collate (const char *t1, const char *t2, gboolean case_sen)
904 return used_class.key_collate (t1, t2, case_sen);
907 /* --------------------------------------------------------------------------------------------- */
909 void
910 str_release_key (char *key, gboolean case_sen)
912 used_class.release_key (key, case_sen);
915 /* --------------------------------------------------------------------------------------------- */
917 void
918 str_msg_term_size (const char *text, int *lines, int *columns)
920 char *p, *tmp;
921 char *q;
922 char c = '\0';
924 *lines = 1;
925 *columns = 0;
927 tmp = g_strdup (text);
928 p = tmp;
930 while (TRUE)
932 int width;
934 q = strchr (p, '\n');
935 if (q != NULL)
937 c = q[0];
938 q[0] = '\0';
941 width = str_term_width1 (p);
942 if (width > *columns)
943 *columns = width;
945 if (q == NULL)
946 break;
948 q[0] = c;
949 p = q + 1;
950 (*lines)++;
953 g_free (tmp);
956 /* --------------------------------------------------------------------------------------------- */
958 char *
959 strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
961 char *semi;
962 ssize_t len;
964 len = strlen (haystack);
968 semi = g_strrstr_len (haystack, len, needle);
969 if (semi == NULL)
970 return NULL;
971 len = semi - haystack - 1;
973 while (skip_count-- != 0);
975 return semi;
978 /* --------------------------------------------------------------------------------------------- */
979 /* Interpret string as a non-negative decimal integer, optionally multiplied by various values.
981 * @param str input value
982 * @param invalid set to TRUE if "str" does not represent a number in this format
984 * @return non-negative integer representation of "str", 0 in case of error.
987 uintmax_t
988 parse_integer (const char *str, gboolean *invalid)
990 uintmax_t n;
991 char *suffix;
992 strtol_error_t e;
994 e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0");
995 if (e == LONGINT_INVALID_SUFFIX_CHAR && *suffix == 'x')
997 uintmax_t multiplier;
999 multiplier = parse_integer (suffix + 1, invalid);
1000 if (multiplier != 0 && n * multiplier / multiplier != n)
1002 *invalid = TRUE;
1003 return 0;
1006 n *= multiplier;
1008 else if (e != LONGINT_OK)
1010 *invalid = TRUE;
1011 n = 0;
1014 return n;
1017 /* --------------------------------------------------------------------------------------------- */