Ticket #1536: fix of case insensitive file sorting in UTF-8 locales.
[kaloumi3.git] / src / strutil8bit.c
blob83f5d20d43cc7a86b7035019b4de45d96909ab55
1 /* 8bit strings utilities
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 Written 2007 by:
5 Rostislav Benes
7 The file_date routine is mostly from GNU's fileutils package,
8 written by Richard Stallman and David MacKenzie.
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 #include <config.h>
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include "global.h"
30 #include "strutil.h"
32 /* functions for singlebyte encodings, all characters have width 1
33 * using standard system functions
34 * there are only small differences between functions in strutil8bit.c
35 * and strutilascii.c
38 static const char replch = '?';
41 * Inlines to equalize 'char' signedness for single 'char' encodings.
42 * Instead of writing
43 * isspace((unsigned char)c);
44 * you can write
45 * char_isspace(c);
48 #define DECLARE_CTYPE_WRAPPER(func_name) \
49 static inline int char_##func_name(char c) \
50 { \
51 return func_name((int)(unsigned char)c); \
54 DECLARE_CTYPE_WRAPPER(isalnum)
55 DECLARE_CTYPE_WRAPPER(isalpha)
56 DECLARE_CTYPE_WRAPPER(isascii)
57 DECLARE_CTYPE_WRAPPER(isblank)
58 DECLARE_CTYPE_WRAPPER(iscntrl)
59 DECLARE_CTYPE_WRAPPER(isdigit)
60 DECLARE_CTYPE_WRAPPER(isgraph)
61 DECLARE_CTYPE_WRAPPER(islower)
62 DECLARE_CTYPE_WRAPPER(isprint)
63 DECLARE_CTYPE_WRAPPER(ispunct)
64 DECLARE_CTYPE_WRAPPER(isspace)
65 DECLARE_CTYPE_WRAPPER(isupper)
66 DECLARE_CTYPE_WRAPPER(isxdigit)
67 DECLARE_CTYPE_WRAPPER(toupper)
68 DECLARE_CTYPE_WRAPPER(tolower)
70 static void
71 str_8bit_insert_replace_char (GString * buffer)
73 g_string_append_c (buffer, replch);
76 static int
77 str_8bit_is_valid_string (const char *text)
79 (void) text;
80 return 1;
83 static int
84 str_8bit_is_valid_char (const char *ch, size_t size)
86 (void) ch;
87 (void) size;
88 return 1;
91 static void
92 str_8bit_cnext_char (const char **text)
94 (*text)++;
97 static void
98 str_8bit_cprev_char (const char **text)
100 (*text)--;
103 static int
104 str_8bit_cnext_noncomb_char (const char **text)
106 if (*text[0] != '\0')
108 (*text)++;
109 return 1;
111 else
112 return 0;
115 static int
116 str_8bit_cprev_noncomb_char (const char **text, const char *begin)
118 if ((*text) != begin)
120 (*text)--;
121 return 1;
123 else
124 return 0;
127 static int
128 str_8bit_isspace (const char *text)
130 return char_isspace (text[0]);
133 static int
134 str_8bit_ispunct (const char *text)
136 return char_ispunct (text[0]);
139 static int
140 str_8bit_isalnum (const char *text)
142 return char_isalnum (text[0]);
145 static int
146 str_8bit_isdigit (const char *text)
148 return char_isdigit (text[0]);
151 static int
152 str_8bit_isprint (const char *text)
154 return char_isprint (text[0]);
157 static int
158 str_8bit_iscombiningmark (const char *text)
160 (void) text;
161 return 0;
164 static int
165 str_8bit_toupper (const char *text, char **out, size_t * remain)
167 if (*remain <= 1)
168 return 0;
169 (*out)[0] = char_toupper (text[0]);
170 (*out)++;
171 (*remain)--;
172 return 1;
175 static int
176 str_8bit_tolower (const char *text, char **out, size_t * remain)
178 if (*remain <= 1)
179 return 0;
180 (*out)[0] = char_tolower (text[0]);
181 (*out)++;
182 (*remain)--;
183 return 1;
186 static int
187 str_8bit_length (const char *text)
189 return strlen (text);
192 static int
193 str_8bit_length2 (const char *text, int size)
195 return (size >= 0) ? min (strlen (text), (gsize)size) : strlen (text);
198 static gchar *
199 str_8bit_conv_gerror_message (GError *error, const char *def_msg)
201 GIConv conv;
202 gchar *ret;
204 /* glib messages are in UTF-8 charset */
205 conv = str_crt_conv_from ("UTF-8");
207 if (conv == INVALID_CONV)
208 ret = g_strdup (def_msg != NULL ? def_msg : "");
209 else {
210 GString *buf;
212 buf = g_string_new ("");
214 if (str_convert (conv, error->message, buf) != ESTR_FAILURE) {
215 ret = buf->str;
216 g_string_free (buf, FALSE);
217 } else {
218 ret = g_strdup (def_msg != NULL ? def_msg : "");
219 g_string_free (buf, TRUE);
222 str_close_conv (conv);
225 return ret;
228 static estr_t
229 str_8bit_vfs_convert_to (GIConv coder, const char *string,
230 int size, GString * buffer)
232 estr_t result;
234 if (coder == str_cnv_not_convert)
236 g_string_append_len (buffer, string, size);
237 result = ESTR_SUCCESS;
239 else
240 result = str_nconvert (coder, (char *) string, size, buffer);
242 return result;
246 static const char *
247 str_8bit_term_form (const char *text)
249 static char result[BUF_MEDIUM];
250 char *actual;
251 size_t remain;
252 size_t length;
253 size_t pos = 0;
255 actual = result;
256 remain = sizeof (result);
257 length = strlen (text);
259 for (; pos < length && remain > 1; pos++, actual++, remain--)
261 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
264 actual[0] = '\0';
265 return result;
268 static const char *
269 str_8bit_fit_to_term (const char *text, int width, align_crt_t just_mode)
271 static char result[BUF_MEDIUM];
272 char *actual;
273 size_t remain;
274 int ident;
275 size_t length;
276 size_t pos = 0;
278 length = strlen (text);
279 actual = result;
280 remain = sizeof (result);
282 if ((int)length <= width)
284 ident = 0;
285 switch (HIDE_FIT (just_mode))
287 case J_CENTER_LEFT:
288 case J_CENTER:
289 ident = (width - length) / 2;
290 break;
291 case J_RIGHT:
292 ident = width - length;
293 break;
296 if ((int)remain <= ident)
297 goto finally;
298 memset (actual, ' ', ident);
299 actual += ident;
300 remain -= ident;
302 for (; pos < length && remain > 1; pos++, actual++, remain--)
304 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
306 if (width - length - ident > 0)
308 if (remain <= width - length - ident)
309 goto finally;
310 memset (actual, ' ', width - length - ident);
311 actual += width - length - ident;
312 remain -= width - length - ident;
315 else
317 if (IS_FIT (just_mode))
319 for (; pos + 1 <= (gsize)width / 2 && remain > 1;
320 actual++, pos++, remain--)
323 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
326 if (remain <= 1)
327 goto finally;
328 actual[0] = '~';
329 actual++;
330 remain--;
332 pos += length - width + 1;
334 for (; pos < length && remain > 1; pos++, actual++, remain--)
336 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
339 else
341 ident = 0;
342 switch (HIDE_FIT (just_mode))
344 case J_CENTER:
345 ident = (length - width) / 2;
346 break;
347 case J_RIGHT:
348 ident = length - width;
349 break;
352 pos += ident;
353 for (; pos < (gsize)(ident + width) && remain > 1;
354 pos++, actual++, remain--)
357 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
362 finally:
363 actual[0] = '\0';
364 return result;
367 static const char *
368 str_8bit_term_trim (const char *text, int width)
370 static char result[BUF_MEDIUM];
371 size_t remain;
372 char *actual;
373 size_t pos = 0;
374 size_t length;
376 length = strlen (text);
377 actual = result;
378 remain = sizeof (result);
380 if (width < (int)length)
382 if (width <= 3)
384 memset (actual, '.', width);
385 actual += width;
386 remain -= width;
388 else
390 memset (actual, '.', 3);
391 actual += 3;
392 remain -= 3;
394 pos += length - width + 3;
396 for (; pos < length && remain > 1; pos++, actual++, remain--)
398 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
402 else
404 for (; pos < length && remain > 1; pos++, actual++, remain--)
406 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
410 actual[0] = '\0';
411 return result;
414 static int
415 str_8bit_term_width2 (const char *text, size_t length)
417 return (length != (size_t) (-1))
418 ? min (strlen (text), length) : strlen (text);
421 static int
422 str_8bit_term_width1 (const char *text)
424 return str_8bit_term_width2 (text, (size_t) (-1));
427 static int
428 str_8bit_term_char_width (const char *text)
430 (void) text;
431 return 1;
434 static void
435 str_8bit_msg_term_size (const char *text, int *lines, int *columns)
438 char *p, *tmp;
439 char *q;
440 char c = '\0';
441 int width;
443 (*lines) = 1;
444 (*columns) = 0;
445 tmp = g_strdup ((char *)text);
446 p = tmp;
447 for (;;)
449 q = strchr (p, '\n');
450 if (q != NULL)
452 c = q[0];
453 q[0] = '\0';
456 width = str_8bit_term_width1 (p);
457 if (width > (*columns))
458 (*columns) = width;
460 if (q == NULL)
461 break;
462 q[0] = c;
463 p = q + 1;
464 (*lines)++;
466 g_free (tmp);
469 static const char *
470 str_8bit_term_substring (const char *text, int start, int width)
472 static char result[BUF_MEDIUM];
473 size_t remain;
474 char *actual;
475 size_t pos = 0;
476 size_t length;
478 actual = result;
479 remain = sizeof (result);
480 length = strlen (text);
482 if (start < (int)length)
484 pos += start;
485 for (; pos < length && width > 0 && remain > 1;
486 pos++, width--, actual++, remain--)
489 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
493 for (; width > 0 && remain > 1; actual++, remain--, width--)
495 actual[0] = ' ';
498 actual[0] = '\0';
499 return result;
502 static const char *
503 str_8bit_trunc (const char *text, int width)
505 static char result[MC_MAXPATHLEN];
506 int remain;
507 char *actual;
508 size_t pos = 0;
509 size_t length;
511 actual = result;
512 remain = sizeof (result);
513 length = strlen (text);
515 if ((int)length > width)
517 for (; pos + 1 <= (gsize)width / 2 && remain > 1; actual++, pos++, remain--)
519 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
522 if (remain <= 1)
523 goto finally;
524 actual[0] = '~';
525 actual++;
526 remain--;
528 pos += length - width + 1;
530 for (; pos < length && remain > 1; pos++, actual++, remain--)
532 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
535 else
537 for (; pos < length && remain > 1; pos++, actual++, remain--)
539 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
543 finally:
544 actual[0] = '\0';
545 return result;
548 static int
549 str_8bit_offset_to_pos (const char *text, size_t length)
551 (void) text;
552 return (int) length;
555 static int
556 str_8bit_column_to_pos (const char *text, size_t pos)
558 (void) text;
559 return (int)pos;
562 static char *
563 str_8bit_create_search_needle (const char *needle, int case_sen)
565 (void) case_sen;
566 return (char *) needle;
569 static void
570 str_8bit_release_search_needle (char *needle, int case_sen)
572 (void) case_sen;
573 (void) needle;
576 static const char *
577 str_8bit_search_first (const char *text, const char *search, int case_sen)
579 char *fold_text;
580 char *fold_search;
581 const char *match;
582 size_t offsset;
584 fold_text = (case_sen) ? (char *) text : g_strdown (g_strdup (text));
585 fold_search = (case_sen) ? (char *) search : g_strdown (g_strdup (search));
587 match = g_strstr_len (fold_text, -1, fold_search);
588 if (match != NULL)
590 offsset = match - fold_text;
591 match = text + offsset;
594 if (!case_sen)
596 g_free (fold_text);
597 g_free (fold_search);
600 return match;
603 static const char *
604 str_8bit_search_last (const char *text, const char *search, int case_sen)
606 char *fold_text;
607 char *fold_search;
608 const char *match;
609 size_t offsset;
611 fold_text = (case_sen) ? (char *) text : g_strdown (g_strdup (text));
612 fold_search = (case_sen) ? (char *) search : g_strdown (g_strdup (search));
614 match = g_strrstr_len (fold_text, -1, fold_search);
615 if (match != NULL)
617 offsset = match - fold_text;
618 match = text + offsset;
621 if (!case_sen)
623 g_free (fold_text);
624 g_free (fold_search);
627 return match;
630 static int
631 str_8bit_compare (const char *t1, const char *t2)
633 return strcmp (t1, t2);
636 static int
637 str_8bit_ncompare (const char *t1, const char *t2)
639 return strncmp (t1, t2, min (strlen (t1), strlen (t2)));
642 static int
643 str_8bit_casecmp (const char *t1, const char *t2)
645 return g_strcasecmp (t1, t2);
648 static int
649 str_8bit_ncasecmp (const char *t1, const char *t2)
651 return g_strncasecmp (t1, t2, min (strlen (t1), strlen (t2)));
654 static int
655 str_8bit_prefix (const char *text, const char *prefix)
657 int result;
658 for (result = 0; text[result] != '\0' && prefix[result] != '\0'
659 && text[result] == prefix[result]; result++);
660 return result;
663 static int
664 str_8bit_caseprefix (const char *text, const char *prefix)
666 int result;
667 for (result = 0; text[result] != '\0' && prefix[result] != '\0'
668 && char_toupper (text[result]) == char_toupper (prefix[result]); result++);
669 return result;
674 static void
675 str_8bit_fix_string (char *text)
677 (void) text;
680 static char *
681 str_8bit_create_key (const char *text, int case_sen)
683 return (case_sen) ? (char *) text : g_strdown (g_strdup (text));
686 static int
687 str_8bit_key_collate (const char *t1, const char *t2, int case_sen)
689 if (case_sen)
690 return strcmp (t1, t2);
691 else
692 return strcoll (t1, t2);
695 static void
696 str_8bit_release_key (char *key, int case_sen)
698 if (!case_sen)
699 g_free (key);
702 struct str_class
703 str_8bit_init (void)
705 struct str_class result;
707 result.conv_gerror_message = str_8bit_conv_gerror_message;
708 result.vfs_convert_to = str_8bit_vfs_convert_to;
709 result.insert_replace_char = str_8bit_insert_replace_char;
710 result.is_valid_string = str_8bit_is_valid_string;
711 result.is_valid_char = str_8bit_is_valid_char;
712 result.cnext_char = str_8bit_cnext_char;
713 result.cprev_char = str_8bit_cprev_char;
714 result.cnext_char_safe = str_8bit_cnext_char;
715 result.cprev_char_safe = str_8bit_cprev_char;
716 result.cnext_noncomb_char = str_8bit_cnext_noncomb_char;
717 result.cprev_noncomb_char = str_8bit_cprev_noncomb_char;
718 result.isspace = str_8bit_isspace;
719 result.ispunct = str_8bit_ispunct;
720 result.isalnum = str_8bit_isalnum;
721 result.isdigit = str_8bit_isdigit;
722 result.isprint = str_8bit_isprint;
723 result.iscombiningmark = str_8bit_iscombiningmark;
724 result.toupper = str_8bit_toupper;
725 result.tolower = str_8bit_tolower;
726 result.length = str_8bit_length;
727 result.length2 = str_8bit_length2;
728 result.length_noncomb = str_8bit_length;
729 result.fix_string = str_8bit_fix_string;
730 result.term_form = str_8bit_term_form;
731 result.fit_to_term = str_8bit_fit_to_term;
732 result.term_trim = str_8bit_term_trim;
733 result.term_width2 = str_8bit_term_width2;
734 result.term_width1 = str_8bit_term_width1;
735 result.term_char_width = str_8bit_term_char_width;
736 result.msg_term_size = str_8bit_msg_term_size;
737 result.term_substring = str_8bit_term_substring;
738 result.trunc = str_8bit_trunc;
739 result.offset_to_pos = str_8bit_offset_to_pos;
740 result.column_to_pos = str_8bit_column_to_pos;
741 result.create_search_needle = str_8bit_create_search_needle;
742 result.release_search_needle = str_8bit_release_search_needle;
743 result.search_first = str_8bit_search_first;
744 result.search_last = str_8bit_search_last;
745 result.compare = str_8bit_compare;
746 result.ncompare = str_8bit_ncompare;
747 result.casecmp = str_8bit_casecmp;
748 result.ncasecmp = str_8bit_ncasecmp;
749 result.prefix = str_8bit_prefix;
750 result.caseprefix = str_8bit_caseprefix;
751 result.create_key = str_8bit_create_key;
752 result.create_key_for_filename = str_8bit_create_key;
753 result.key_collate = str_8bit_key_collate;
754 result.release_key = str_8bit_release_key;
756 return result;