4 #include "lib/global.h" /* include glib.h */
10 /* Header file for strutil.c, strutilascii.c, strutil8bit.c, strutilutf8.c.
11 * There are two sort of functions:
12 * 1. functions for working with growing strings and conversion strings between
13 * different encodings.
14 * (implemented directly in strutil.c)
15 * 2. functions, that hide differences between encodings derived from ASCII.
16 * (implemented separately in strutilascii.c, strutil8bit.c, strutilutf8.c)
17 * documentation is made for UTF-8 version of functions.
21 * function, that works with invalid strings are marked with "I"
23 * invalid bytes of string are handled as one byte characters with width 1, they
24 * are displayed as question marks, I-marked comparing functions try to keep
25 * the original value of these bytes.
28 /* combining characters
29 * displaying: all handled as zero with characters, expect combing character
30 * at the begin of string, this character has with one (space add before),
31 * so str_term_width is not good for computing width of singles characters
32 * (never return zero, expect empty string)
33 * for compatibility are strings composed before displaying
34 * comparing: comparing decompose all string before comparing, n-compare
35 * functions do not work as is usual, because same strings do not have to be
36 * same length in UTF-8. So they return 0 if one string is prefix of the other
38 * str_prefix is used to determine, how many characters from one string are
39 * prefix in second string. However, str_prefix return number of characters in
40 * decompose form. (used in do_search (screen.c))
43 /*** typedefs(not structures) and defined constants **********************************************/
45 #define IS_FIT(x) ((x) & 0x0010)
46 #define MAKE_FIT(x) ((x) | 0x0010)
47 #define HIDE_FIT(x) ((x) & 0x000f)
49 #define INVALID_CONV ((GIConv) (-1))
51 /*** enums ***************************************************************************************/
53 /* results of conversion function
57 /* Success means, that the conversion has been finished successfully
60 /* Problem means, that not every character was successfully converted (some are
61 * replaced with question marks). So it is impossible to convert string back.
64 /* Failure means, that the conversion is not possible (example: wrong encoding
65 * of the input string)
70 /* alignment strings on terminal
77 /* if there is enough space for string on terminal,
78 * string is centered otherwise is aligned to the left */
80 /* fit alignment: if string is too long, truncate with '~' */
84 J_CENTER_LEFT_FIT
= 0x14
87 /* string-to-integer parsing results
93 /* These two values can be ORed together, to indicate that both errors occurred. */
95 LONGINT_INVALID_SUFFIX_CHAR
= 2,
97 LONGINT_INVALID_SUFFIX_CHAR_WITH_OVERFLOW
= (LONGINT_INVALID_SUFFIX_CHAR
| LONGINT_OVERFLOW
),
101 /*** structures declarations (and typedefs of structures)*****************************************/
103 /* all functions in str_class must be defined for every encoding */
107 gchar
*(*conv_gerror_message
) (GError
* error
, const char *def_msg
);
108 /*I*/ estr_t (*vfs_convert_to
) (GIConv coder
, const char *string
, int size
, GString
* buffer
);
109 /*I*/ void (*insert_replace_char
) (GString
* buffer
);
110 gboolean (*is_valid_string
) (const char *text
);
111 /*I*/ int (*is_valid_char
) (const char *ch
, size_t size
);
112 /*I*/ void (*cnext_char
) (const char **text
);
113 void (*cprev_char
) (const char **text
);
114 void (*cnext_char_safe
) (const char **text
);
115 /*I*/ void (*cprev_char_safe
) (const char **text
);
116 /*I*/ int (*cnext_noncomb_char
) (const char **text
);
117 /*I*/ int (*cprev_noncomb_char
) (const char **text
, const char *begin
);
118 /*I*/ gboolean (*char_isspace
) (const char *ch
);
119 /*I*/ gboolean (*char_ispunct
) (const char *ch
);
120 /*I*/ gboolean (*char_isalnum
) (const char *ch
);
121 /*I*/ gboolean (*char_isdigit
) (const char *ch
);
122 /*I*/ gboolean (*char_isprint
) (const char *ch
);
123 /*I*/ gboolean (*char_iscombiningmark
) (const char *ch
);
124 /*I*/ int (*length
) (const char *text
);
125 /*I*/ int (*length2
) (const char *text
, int size
);
126 /*I*/ int (*length_noncomb
) (const char *text
);
127 /*I*/ gboolean (*char_toupper
) (const char *ch
, char **out
, size_t * remain
);
128 gboolean (*char_tolower
) (const char *ch
, char **out
, size_t * remain
);
129 void (*fix_string
) (char *text
);
130 /*I*/ const char *(*term_form
) (const char *text
);
131 /*I*/ const char *(*fit_to_term
) (const char *text
, int width
, align_crt_t just_mode
);
132 /*I*/ const char *(*term_trim
) (const char *text
, int width
);
133 /*I*/ const char *(*term_substring
) (const char *text
, int start
, int width
);
134 /*I*/ int (*term_width1
) (const char *text
);
135 /*I*/ int (*term_width2
) (const char *text
, size_t length
);
136 /*I*/ int (*term_char_width
) (const char *length
);
137 /*I*/ const char *(*trunc
) (const char *length
, int width
);
138 /*I*/ int (*offset_to_pos
) (const char *text
, size_t length
);
139 /*I*/ int (*column_to_pos
) (const char *text
, size_t pos
);
140 /*I*/ char *(*create_search_needle
) (const char *needle
, gboolean case_sen
);
141 void (*release_search_needle
) (char *needle
, gboolean case_sen
);
142 const char *(*search_first
) (const char *text
, const char *needle
, gboolean case_sen
);
143 const char *(*search_last
) (const char *text
, const char *needle
, gboolean case_sen
);
144 int (*compare
) (const char *t1
, const char *t2
);
145 /*I*/ int (*ncompare
) (const char *t1
, const char *t2
);
146 /*I*/ int (*casecmp
) (const char *t1
, const char *t2
);
147 /*I*/ int (*ncasecmp
) (const char *t1
, const char *t2
);
148 /*I*/ int (*prefix
) (const char *text
, const char *prefix
);
149 /*I*/ int (*caseprefix
) (const char *text
, const char *prefix
);
150 /*I*/ char *(*create_key
) (const char *text
, gboolean case_sen
);
151 /*I*/ char *(*create_key_for_filename
) (const char *text
, gboolean case_sen
);
152 /*I*/ int (*key_collate
) (const char *t1
, const char *t2
, gboolean case_sen
);
153 /*I*/ void (*release_key
) (char *key
, gboolean case_sen
);
157 /*** global variables defined in .c file *********************************************************/
159 /* standard converters */
160 extern GIConv str_cnv_to_term
;
161 extern GIConv str_cnv_from_term
;
162 /* from terminal encoding to terminal encoding */
163 extern GIConv str_cnv_not_convert
;
165 /*** declarations of public functions ************************************************************/
167 struct str_class
str_utf8_init (void);
168 struct str_class
str_8bit_init (void);
169 struct str_class
str_ascii_init (void);
171 /* create converter from "from_enc" to terminal encoding
172 * if "from_enc" is not supported return INVALID_CONV
174 GIConv
str_crt_conv_from (const char *from_enc
);
176 /* create converter from terminal encoding to "to_enc"
177 * if "to_enc" is not supported return INVALID_CONV
179 GIConv
str_crt_conv_to (const char *to_enc
);
181 /* close converter, do not close str_cnv_to_term, str_cnv_from_term,
182 * str_cnv_not_convert
184 void str_close_conv (GIConv conv
);
186 /* return on of not used buffers (.used == 0) or create new
187 * returned buffer has set .used to 1
190 /* convert string using coder, result of conversion is appended at end of buffer
191 * return ESTR_SUCCESS if there was no problem.
192 * otherwise return ESTR_PROBLEM or ESTR_FAILURE
194 estr_t
str_convert (GIConv coder
, const char *string
, GString
* buffer
);
195 estr_t
str_nconvert (GIConv coder
, const char *string
, int size
, GString
* buffer
);
197 /* convert GError message (which in UTF-8) to terminal charset
198 * def_char is used if result of error->str conversion if ESTR_FAILURE
199 * return new allocated null-terminated string, which is need to be freed
202 gchar
*str_conv_gerror_message (GError
* error
, const char *def_msg
);
204 /* return only ESTR_SUCCESS or ESTR_FAILURE, because vfs must be able to convert
205 * result to original string. (so no replace with questionmark)
206 * if coder is str_cnv_from_term or str_cnv_not_convert, string is only copied,
207 * so is possible to show file, that is not valid in terminal encoding
209 estr_t
str_vfs_convert_from (GIConv coder
, const char *string
, GString
* buffer
);
211 /* if coder is str_cnv_to_term or str_cnv_not_convert, string is only copied,
212 * does replace with question mark
215 estr_t
str_vfs_convert_to (GIConv coder
, const char *string
, int size
, GString
* buffer
);
217 /* printf function for str_buffer, append result of printf at the end of buffer
220 void str_printf (GString
* buffer
, const char *format
, ...) G_GNUC_PRINTF (2, 3);
223 /* add standard replacement character in terminal encoding
225 void str_insert_replace_char (GString
* buffer
);
227 /* init strings and set terminal encoding,
228 * if is termenc NULL, detect terminal encoding
229 * create all str_cnv_* and set functions for terminal encoding
231 void str_init_strings (const char *termenc
);
233 /* free all str_buffer and all str_cnv_*
235 void str_uninit_strings (void);
237 /* try convert characters in ch to output using conv
238 * ch_size is size of ch, can by (size_t)(-1) (-1 only for ASCII
239 * compatible encoding, for other must be set)
240 * return ESTR_SUCCESS if conversion was successfully,
241 * ESTR_PROBLEM if ch contains only part of characters,
242 * ESTR_FAILURE if conversion is not possible
244 estr_t
str_translate_char (GIConv conv
, const char *ch
, size_t ch_size
,
245 char *output
, size_t out_size
);
247 /* test, if text is valid in terminal encoding
250 gboolean
str_is_valid_string (const char *text
);
252 /* test, if first char of ch is valid
253 * size, how many bytes characters occupied, could be (size_t)(-1)
254 * return 1 if it is valid, -1 if it is invalid or -2 if it is only part of
255 * multibyte character
258 int str_is_valid_char (const char *ch
, size_t size
);
260 /* return next characters after text, do not call on the end of string
262 char *str_get_next_char (char *text
);
263 const char *str_cget_next_char (const char *text
);
265 /* return previous characters before text, do not call on the start of strings
267 char *str_get_prev_char (char *text
);
268 const char *str_cget_prev_char (const char *text
);
270 /* set text to next characters, do not call on the end of string
272 void str_next_char (char **text
);
273 void str_cnext_char (const char **text
);
275 /* set text to previous characters, do not call on the start of strings
277 void str_prev_char (char **text
);
278 void str_cprev_char (const char **text
);
280 /* return next characters after text, do not call on the end of string
281 * works with invalid string
284 char *str_get_next_char_safe (char *text
);
285 const char *str_cget_next_char_safe (const char *text
);
287 /* return previous characters before text, do not call on the start of strings
288 * works with invalid string
291 char *str_get_prev_char_safe (char *text
);
292 const char *str_cget_prev_char_safe (const char *text
);
294 /* set text to next characters, do not call on the end of string
295 * works with invalid string
298 void str_next_char_safe (char **text
);
299 void str_cnext_char_safe (const char **text
);
301 /* set text to previous characters, do not call on the start of strings
302 * works with invalid string
305 void str_prev_char_safe (char **text
);
306 void str_cprev_char_safe (const char **text
);
308 /* set text to next noncombining characters, check the end of text
309 * return how many characters was skipped
310 * works with invalid string
313 int str_next_noncomb_char (char **text
);
314 int str_cnext_noncomb_char (const char **text
);
316 /* set text to previous noncombining characters, search stop at begin
317 * return how many characters was skipped
318 * works with invalid string
321 int str_prev_noncomb_char (char **text
, const char *begin
);
322 int str_cprev_noncomb_char (const char **text
, const char *begin
);
324 /* if first characters in ch is space, tabulator or new lines
327 gboolean
str_isspace (const char *ch
);
329 /* if first characters in ch is punctuation or symbol
332 gboolean
str_ispunct (const char *ch
);
334 /* if first characters in ch is alphanum
337 gboolean
str_isalnum (const char *ch
);
339 /* if first characters in ch is digit
342 gboolean
str_isdigit (const char *ch
);
344 /* if first characters in ch is printable
347 gboolean
str_isprint (const char *ch
);
349 /* if first characters in ch is a combining mark (only in utf-8)
350 * combining makrs are assumed to be zero width
353 gboolean
str_iscombiningmark (const char *ch
);
355 /* write lower from of first characters in ch into out
356 * decrease remain by size of returned characters
357 * if out is not big enough, do nothing
359 gboolean
str_toupper (const char *ch
, char **out
, size_t *remain
);
361 /* write upper from of first characters in ch into out
362 * decrease remain by size of returned characters
363 * if out is not big enough, do nothing
365 gboolean
str_tolower (const char *ch
, char **out
, size_t *remain
);
367 /* return length of text in characters
370 int str_length (const char *text
);
372 /* return length of text in characters, limit to size
375 int str_length2 (const char *text
, int size
);
377 /* return length of one char
380 int str_length_char (const char *text
);
382 /* return length of text in characters, count only noncombining characters
385 int str_length_noncomb (const char *text
);
387 /* replace all invalid characters in text with questionmark
388 * after return, text is valid string in terminal encoding
391 void str_fix_string (char *text
);
393 /* replace all invalid characters in text with questionmark
394 * replace all unprintable characters with '.'
395 * return static allocated string, "text" is not changed
396 * returned string do not need to be freed
399 const char *str_term_form (const char *text
);
401 /* like str_term_form, but text can be alignment to width
402 * alignment is specified in just_mode (J_LEFT, J_LEFT_FIT, ...)
403 * result is completed with spaces to width
406 const char *str_fit_to_term (const char *text
, int width
, align_crt_t just_mode
);
408 /* like str_term_form, but when text is wider than width, three dots are
409 * inserted at begin and result is completed with suffix of text
410 * no additional spaces are inserted
413 const char *str_term_trim (const char *text
, int width
);
416 /* like str_term_form, but return only specified substring
417 * start - column (position) on terminal, where substring begin
418 * result is completed with spaces to width
421 const char *str_term_substring (const char *text
, int start
, int width
);
423 /* return width, that will be text occupied on terminal
426 int str_term_width1 (const char *text
);
428 /* return width, that will be text occupied on terminal
429 * text is limited by length in characters
432 int str_term_width2 (const char *text
, size_t length
);
434 /* return width, that will be character occupied on terminal
435 * combining characters are always zero width
438 int str_term_char_width (const char *text
);
440 /* convert position in characters to position in bytes
443 int str_offset_to_pos (const char *text
, size_t length
);
445 /* convert position on terminal to position in characters
448 int str_column_to_pos (const char *text
, size_t pos
);
450 /* like str_fit_to_term width just_mode = J_LEFT_FIT,
451 * but do not insert additional spaces
454 const char *str_trunc (const char *text
, int width
);
456 /* create needle, that will be searched in str_search_fist/last,
457 * so needle can be reused
458 * in UTF-8 return normalized form of needle
460 char *str_create_search_needle (const char *needle
, gboolean case_sen
);
462 /* free needle returned by str_create_search_needle
464 void str_release_search_needle (char *needle
, gboolean case_sen
);
466 /* search for first occurrence of search in text
468 const char *str_search_first (const char *text
, const char *needle
, gboolean case_sen
);
470 /* search for last occurrence of search in text
472 const char *str_search_last (const char *text
, const char *needle
, gboolean case_sen
);
474 /* case sensitive compare two strings
477 int str_compare (const char *t1
, const char *t2
);
479 /* case sensitive compare two strings
480 * if one string is prefix of the other string, return 0
483 int str_ncompare (const char *t1
, const char *t2
);
485 /* case insensitive compare two strings
488 int str_casecmp (const char *t1
, const char *t2
);
490 /* case insensitive compare two strings
491 * if one string is prefix of the other string, return 0
494 int str_ncasecmp (const char *t1
, const char *t2
);
496 /* return, how many bytes are are same from start in text and prefix
497 * both strings are decomposed before comparing and return value is counted
498 * in decomposed form, too. calling with prefix, prefix, you get size in bytes
499 * of prefix in decomposed form,
502 int str_prefix (const char *text
, const char *prefix
);
504 /* case insensitive version of str_prefix
507 int str_caseprefix (const char *text
, const char *prefix
);
509 /* create a key that is used by str_key_collate
512 char *str_create_key (const char *text
, gboolean case_sen
);
514 /* create a key that is used by str_key_collate
515 * should aware dot '.' in text
518 char *str_create_key_for_filename (const char *text
, gboolean case_sen
);
520 /* compare two string using LC_COLLATE, if is possible
521 * if case_sen is set, comparing is case sensitive,
522 * case_sen must be same for str_create_key, str_key_collate and str_release_key
525 int str_key_collate (const char *t1
, const char *t2
, gboolean case_sen
);
527 /* release_key created by str_create_key, only right way to release key
530 void str_release_key (char *key
, gboolean case_sen
);
532 /* return TRUE if codeset_name is utf8 or utf-8
535 gboolean
str_isutf8 (const char *codeset_name
);
537 const char *str_detect_termencoding (void);
539 int str_verscmp (const char *s1
, const char *s2
);
541 /* Like filevercmp, except compare the byte arrays a (of length alen) and b (of length blen)
542 so that a and b can contain '\0', which sorts just before '\1'. But if alen is -1 treat
543 a as a string terminated by '\0', and similarly for blen.
545 int filenvercmp (char const *a
, ssize_t alen
, char const *b
, ssize_t blen
);
548 /* return how many lines and columns will text occupy on terminal
550 void str_msg_term_size (const char *text
, int *lines
, int *columns
);
553 * skip first needle's in haystack
555 * @param haystack pointer to string
556 * @param needle pointer to string
557 * @param skip_count skip first bytes
559 * @return pointer to skip_count+1 needle (or NULL if not found).
562 char *strrstr_skip_count (const char *haystack
, const char *needle
, size_t skip_count
);
564 char *str_replace_all (const char *haystack
, const char *needle
, const char *replacement
);
566 GPtrArray
*str_tokenize (const char *string
);
568 strtol_error_t
xstrtoumax (const char *nptr
, char **endptr
, int base
, uintmax_t * val
,
569 const char *valid_suffixes
);
570 uintmax_t parse_integer (const char *str
, gboolean
* invalid
);
572 char *str_escape (const char *src
, gsize src_len
, const char *escaped_chars
,
573 gboolean escape_non_printable
);
574 char *str_unescape (const char *src
, gsize src_len
, const char *unescaped_chars
,
575 gboolean unescape_non_printable
);
576 char *str_shell_unescape (const char *text
);
577 char *str_shell_escape (const char *text
);
579 char *str_glob_escape (const char *text
);
580 char *str_glob_unescape (const char *text
);
582 char *str_regex_escape (const char *text
);
583 char *str_regex_unescape (const char *text
);
585 gboolean
str_is_char_escaped (const char *start
, const char *current
);
587 /* --------------------------------------------------------------------------------------------- */
588 /*** inline functions ****************************************************************************/
589 /* --------------------------------------------------------------------------------------------- */
592 str_replace (char *s
, char from
, char to
)
594 for (; *s
!= '\0'; s
++)
601 /* --------------------------------------------------------------------------------------------- */
603 * strcpy is unsafe on overlapping memory areas, so define memmove-alike
605 * Have sense only when:
608 * * dest and str are pointers to one object (as Roland Illig pointed).
610 * We can't use str*cpy funs here:
611 * http://kerneltrap.org/mailarchive/openbsd-misc/2008/5/27/1951294
613 * @param dest pointer to string
614 * @param src pointer to string
616 * @return newly allocated string
621 str_move (char *dest
, const char *src
)
625 g_assert (dest
<= src
);
627 n
= strlen (src
) + 1; /* + '\0' */
629 return (char *) memmove (dest
, src
, n
);
632 /* --------------------------------------------------------------------------------------------- */
633 /* Compare version strings:
635 Compare strings a and b as file names containing version numbers, and return an integer
636 that is negative, zero, or positive depending on whether a compares less than, equal to,
639 Use the following version sort algorithm:
641 1. Compare the strings' maximal-length non-digit prefixes lexically.
642 If there is a difference return that difference.
643 Otherwise discard the prefixes and continue with the next step.
645 2. Compare the strings' maximal-length digit prefixes, using numeric comparison
646 of the numbers represented by each prefix. (Treat an empty prefix as zero; this can
647 happen only at string end.)
648 If there is a difference, return that difference.
649 Otherwise discard the prefixes and continue with the next step.
651 3. If both strings are empty, return 0. Otherwise continue with step 1.
653 In version sort, lexical comparison is left to right, byte by byte, using the byte's numeric
654 value (0-255), except that:
656 1. ASCII letters sort before other bytes.
657 2. A tilde sorts before anything, even an empty string.
659 In addition to the version sort rules, the following strings have special priority and sort
660 before all other strings (listed in order):
665 4. Strings starting with "." sort before other strings.
667 Before comparing two strings where both begin with non-".", or where both begin with "."
668 but neither is "." or "..", suffixes matching the C-locale extended regular expression
669 (\.[A-Za-z~][A-Za-z0-9~]*)*$ are removed and the strings compared without them, using version sort
670 without special priority; if they do not compare equal, this comparison result is used and
671 the suffixes are effectively ignored. Otherwise, the entire strings are compared using version sort.
672 When removing a suffix from a nonempty string, remove the maximal-length suffix such that
673 the remaining string is nonempty.
677 filevercmp (const char *s1
, const char *s2
)
679 return filenvercmp (s1
, -1, s2
, -1);
682 /* --------------------------------------------------------------------------------------------- */
684 #endif /* MC_STRUTIL_H */