Move isempty_(non_)utf8 to utils.c
[smenu.git] / utils.c
blobc3f786fc71f38aff747dd3abc0ab5169d7663511
1 /* ################################################################### */
2 /* Copyright 2015, Pierre Gentile (p.gen.progs@gmail.com) */
3 /* */
4 /* This Source Code Form is subject to the terms of the Mozilla Public */
5 /* License, v. 2.0. If a copy of the MPL was not distributed with this */
6 /* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
7 /* ################################################################### */
9 /* ******************************** */
10 /* Various small utility functions. */
11 /* ******************************** */
13 #include "config.h"
14 #include <stddef.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <limits.h>
18 #include <string.h>
19 #include <ctype.h>
20 #include <stdarg.h>
21 #include <wctype.h>
22 #include "xmalloc.h"
23 #include "wchar.h"
24 #include "list.h"
25 #include "utf8.h"
26 #include "utils.h"
28 /* ******************* */
29 /* Interval functions. */
30 /* ******************* */
32 /* ======================= */
33 /* Creates a new interval. */
34 /* ======================= */
35 interval_t *
36 interval_new(void)
38 return xmalloc(sizeof(interval_t));
41 /* ======================================= */
42 /* Compares 2 intervals as integer couples */
43 /* same return values as for strcmp. */
44 /* ======================================= */
45 int
46 interval_comp(void const *a, void const *b)
48 interval_t const *ia = (interval_t *)a;
49 interval_t const *ib = (interval_t *)b;
51 if (ia->low < ib->low)
52 /* ia: [... */
53 /* ib: [... */
54 return -1;
55 if (ia->low > ib->low)
56 /* ia: [... */
57 /* ib: [... */
58 return 1;
59 if (ia->high < ib->high)
60 /* ia: ...] */
61 /* ib: ...] */
62 return -1;
63 if (ia->high > ib->high)
64 /* ia: ...] */
65 /* ib: ...] */
66 return 1;
68 return 0;
71 /* ================================== */
72 /* Swaps the values of two intervals. */
73 /* ================================== */
74 void
75 interval_swap(void **a, void **b)
77 interval_t *ia = (interval_t *)*a;
78 interval_t *ib = (interval_t *)*b;
79 long tmp;
81 tmp = ia->low;
82 ia->low = ib->low;
83 ib->low = tmp;
85 tmp = ia->high;
86 ia->high = ib->high;
87 ib->high = tmp;
90 /* ====================================================================== */
91 /* Merges the intervals from an interval list in order to get the minimum */
92 /* number of intervals to consider. */
93 /* ====================================================================== */
94 void
95 optimize_an_interval_list(ll_t *list)
97 ll_node_t *node1, *node2;
98 interval_t *data1, *data2;
100 if (!list || list->len < 2)
101 return;
103 /* Step 1: sort the intervals list. */
104 /* """""""""""""""""""""""""""""""" */
105 ll_sort(list, interval_comp, interval_swap);
107 /* Step 2: merge the list by merging the consecutive intervals. */
108 /* """""""""""""""""""""""""""""""""""""""""""""""""""""""""""" */
109 node1 = list->head;
110 node2 = node1->next;
112 while (node2)
114 data1 = (interval_t *)(node1->data);
115 data2 = (interval_t *)(node2->data);
117 if (data1->high >= data2->low - 1)
119 /* Interval 1 overlaps interval 2. */
120 /* ''''''''''''''''''''''''''''''' */
121 if (data2->high >= data1->high)
122 data1->high = data2->high;
123 ll_delete(list, node2);
124 free(data2);
125 node2 = node1->next;
127 else
129 /* No overlap. */
130 /* ''''''''''' */
131 node1 = node2;
132 node2 = node2->next;
137 /* ***************** */
138 /* String functions. */
139 /* ***************** */
141 /* ========================================================================= */
142 /* Allocates memory and safely concatenate strings. Stolen from a public */
143 /* domain implementation which can be found here: */
144 /* http://openwall.info/wiki/people/solar/software/public-domain-source-code */
145 /* ========================================================================= */
146 char *
147 concat(const char *s1, ...)
149 va_list args;
150 const char *s;
151 char *p, *result;
152 size_t l, m, n;
154 m = n = strlen(s1);
155 va_start(args, s1);
156 while ((s = va_arg(args, char *)))
158 l = strlen(s);
159 if ((m += l) < l)
160 break;
162 va_end(args);
163 if (s || m >= INT_MAX)
164 return NULL;
166 result = (char *)xmalloc(m + 1);
168 memcpy(p = result, s1, n);
169 p += n;
170 va_start(args, s1);
171 while ((s = va_arg(args, char *)))
173 l = strlen(s);
174 if ((n += l) < l || n > m)
175 break;
176 memcpy(p, s, l);
177 p += l;
179 va_end(args);
180 if (s || m != n || p != result + n)
182 free(result);
183 return NULL;
186 *p = 0;
187 return result;
190 /* =============================================== */
191 /* Is the string str2 a prefix of the string str1? */
192 /* Returns 1 if true, else 0. */
193 /* =============================================== */
195 strprefix(char *str1, char *str2)
197 while (*str1 != '\0' && *str1 == *str2)
199 str1++;
200 str2++;
203 return *str2 == '\0';
206 /* ========================= */
207 /* Trims leading characters. */
208 /* ========================= */
209 void
210 ltrim(char *str, const char *trim_str)
212 size_t len = strlen(str);
213 size_t begin = strspn(str, trim_str);
215 if (begin > 0)
216 for (size_t i = begin; i <= len; ++i)
217 str[i - begin] = str[i];
220 /* ==================================================================== */
221 /* Trims trailing characters. */
222 /* All (ASCII) characters in trim_str will be removed. */
223 /* The min argument guarantees that the length of the resulting string */
224 /* will not be smaller than this size if it was larger before, 0 is the */
225 /* usual value here. */
226 /* Note that when min is greater than 0, tail characters intended to be */
227 /* deleted may remain. */
228 /* ==================================================================== */
229 void
230 rtrim(char *str, const char *trim_str, size_t min)
232 size_t len = strlen(str);
233 while (len > min && strchr(trim_str, str[len - 1]))
234 str[--len] = '\0';
237 /* ========================================= */
238 /* Case insensitive strcmp. */
239 /* from http://c.snippets.org/code/stricmp.c */
240 /* ========================================= */
242 my_strcasecmp(const char *str1, const char *str2)
244 #ifdef HAVE_STRCASECMP
245 return strcasecmp(str1, str2);
246 #else
247 int retval = 0;
249 while (1)
251 retval = tolower(*str1++) - tolower(*str2++);
253 if (retval)
254 break;
256 if (*str1 && *str2)
257 continue;
258 else
259 break;
261 return retval;
262 #endif
265 /* ============================================= */
266 /* memmove based strcpy (tolerates overlapping). */
267 /* ============================================= */
268 char *
269 my_strcpy(char *str1, char *str2)
271 if (str1 == NULL || str2 == NULL)
272 return NULL;
274 memmove(str1, str2, strlen(str2) + 1);
276 return str1;
279 /* ================================ */
280 /* 7 bits aware version of isprint. */
281 /* ================================ */
283 isprint7(int i)
285 return i >= 0x20 && i <= 0x7e;
288 /* ================================ */
289 /* 8 bits aware version of isprint. */
290 /* ================================ */
292 isprint8(int i)
294 unsigned char c = i & (unsigned char)0xff;
296 return (c >= 0x20 && c < 0x7f) || (c >= (unsigned char)0xa0);
299 /* ==================================================== */
300 /* Private implementation of wcscasecmp missing in c99. */
301 /* ==================================================== */
303 my_wcscasecmp(const wchar_t *s1, const wchar_t *s2)
305 wchar_t c1, c2;
307 while (*s1)
309 c1 = towlower(*s1);
310 c2 = towlower(*s2);
312 if (c1 != c2)
313 return (int)(c1 - c2);
315 s1++;
316 s2++;
318 return -*s2;
321 /* ================================================================ */
322 /* Returns 1 if s can be converted into an int otherwise returns 0. */
323 /* ================================================================ */
325 is_integer(const char * const s)
327 long int n;
328 char *endptr;
330 n = strtol(s, &endptr, 10);
332 if (errno != ERANGE && n >= INT_MIN && n <= INT_MAX && *endptr == '\0')
333 return 1;
335 return 0;
338 /* ===================================================== */
339 /* Exchanges the start and end part of a string. */
340 /* The first part goes from char 0 to size-1. */
341 /* The second part goes from char size to the end of *s. */
342 /* Returns 1 on success. */
343 /* ===================================================== */
345 swap_string_parts(char **s, size_t first)
347 char *tmp;
348 size_t size;
350 if (*s == NULL || **s == '\0')
351 return 0;
353 tmp = xmalloc(strlen(*s) * 2 + 1);
354 size = strlen(*s);
356 if (first > size)
357 return 0;
359 strcpy(tmp, *s);
360 strcat(tmp, *s);
361 strncpy(*s, tmp + first, size);
363 free(tmp);
364 return 1;
367 /* ================================================================ */
368 /* Substitute all the characters c1 by c2 in the string s in place. */
369 /* ================================================================ */
370 void
371 strrep(char *s, const char c1, const char c2)
373 if (s != NULL)
374 while (*s)
376 if (*s == c1)
377 *s = c2;
378 s++;
382 /* ================================================================== */
383 /* Allocates and returns a string similar to s but with non printable */
384 /* character changed by their ASCII hexadecimal notation. */
385 /* ================================================================== */
386 char *
387 strprint(char const *s)
389 size_t l = strlen(s);
390 char *new = xcalloc(1, 4 * l + 1);
391 char *p = new;
393 while (*s)
395 if (isprint(*s))
396 *(p++) = *s++;
397 else
399 sprintf(p, "\\x%02X", (unsigned char)*s++);
400 p += 4;
404 if (p - new > (ptrdiff_t)l)
405 new = xrealloc(new, p - new + 1);
407 return new;
410 /* =============================================== */
411 /* Hexadecimal dump of part of a buffer to a file. */
412 /* */
413 /* buf : buffer to dump. */
414 /* fp : file to dump to. */
415 /* prefix: string to be printed before each line. */
416 /* size : length of the buffer to consider. */
417 /* =============================================== */
418 void
419 hexdump(const char *buf, FILE *fp, const char *prefix, size_t size)
421 unsigned int b;
422 unsigned char d[17];
423 unsigned int o, mo;
424 size_t l;
426 o = mo = 0;
427 l = strlen(prefix);
429 memset(d, '\0', 17);
430 for (b = 0; b < size; b++)
433 d[b % 16] = isprint(buf[b]) ? (unsigned char)buf[b] : '.';
435 if ((b % 16) == 0)
437 o = l + 7;
438 if (o > mo)
439 mo = o;
440 fprintf(fp, "%s: %04x:", prefix, b);
443 o += 3;
444 if (o > mo)
445 mo = o;
446 fprintf(fp, " %02x", (unsigned char)buf[b]);
448 if ((b % 16) == 15)
450 mo = o;
451 o = 0;
452 fprintf(fp, " |%s|", d);
453 memset(d, '\0', 17);
454 fprintf(fp, "\n");
457 if ((b % 16) != 0)
459 for (unsigned int i = 0; i < mo - o; i++)
460 fprintf(fp, "%c", ' ');
462 fprintf(fp, " |%s", d);
463 if (mo > o)
464 for (unsigned int i = 0; i < 16 - strlen((char *)d); i++)
465 fprintf(fp, "%c", ' ');
466 fprintf(fp, "%c", '|');
467 memset(d, '\0', 17);
468 fprintf(fp, "\n");
472 /* ===================================================================== */
473 /* Version of wcswidth which tries to support extended grapheme clusters */
474 /* by taking into zero width characters. */
475 /* ===================================================================== */
477 my_wcswidth(const wchar_t *s, size_t n)
479 int len = 0;
480 int l = 0;
481 int m = 0;
483 if (s == NULL || *s == L'\0')
484 return 0;
486 while (*s && m < n)
488 if ((l = wcwidth(*s)) >= 0)
490 /* Do not count zero-width-length glyphs. */
491 /* """""""""""""""""""""""""""""""""""""" */
492 if (*s != L'\x200d' && *(s + 1) != L'\x200d' && *(s + 1) != L'\xfe0f'
493 && *(s + 1) != L'\x20e3')
494 len += l;
496 else
497 return -1; /* wcwidth returned -1. */
499 s++;
500 m++;
503 return len;
506 /* ==================================================================== */
507 /* Get the target index of the number to be inserted in a sorted array. */
508 /* */
509 /* IN array : an already empty or sorted array. */
510 /* IN value : the value to be inserted in the array. */
511 /* IN nb : the number of existing elements in the array. */
512 /* */
513 /* RETURN : -1 if the value is already in the array of the future */
514 /* index of value in the array. */
515 /* ==================================================================== */
516 long
517 get_sorted_array_target_pos(long *array, long nb, long value)
519 long pos = nb;
520 long left = 0, right = nb, middle;
522 if (nb > 0)
524 /* Bisection search. */
525 /* """"""""""""""""" */
526 while (left < right)
528 middle = (left + right) / 2;
529 if (array[middle] == value)
530 return -1; /* Value already in array. */
532 if (value < array[middle])
533 right = middle;
534 else
535 left = middle + 1;
537 pos = left;
539 return pos;
542 /* =============================================================== */
543 /* Detect if the current terminal belongs to the foreground group. */
544 /* returns 1 if yes else returns 0. */
545 /* =============================================================== */
547 is_in_foreground_process_group(void)
549 int fd, fg;
551 fd = open("/dev/tty", O_RDONLY);
552 if (fd < 0)
553 return 0;
555 fg = (tcgetpgrp(fd) == getpgid(0));
557 close(fd);
559 return fg;
562 /* ====================================================== */
563 /* Returns 1 if a string is empty or only made of spaces. */
564 /* Non UTF-8 version. */
565 /* ====================================================== */
567 isempty_non_utf8(const unsigned char *s)
569 while (*s != '\0')
571 if (*s != ' ' && *s != '\t')
572 return 0;
573 s++;
575 return 1;
578 /* ====================================================== */
579 /* Returns 1 if a string is empty or only made of spaces. */
580 /* UTF-8 version. */
581 /* ====================================================== */
583 isempty_utf8(const unsigned char *s)
585 unsigned char c, d;
587 while (*s != '\0')
589 if (*s == ' ' || *s == '\t') /* Normal ASCII spaces. */
590 goto next;
592 if (*s < 0xc2) /* Not an UTF-8 space -> return FALSE. */
593 return 0;
595 /* Scanning for a potential non UTF-8 spaces scanning. */
596 /* """"""""""""""""""""""""""""""""""""""""""""""""""" */
597 if ((c = *(s + 1)) != '\0')
599 if (*s == 0xc2 && (c == 0x85 || c == 0xa0))
601 s++;
602 goto next; /* Unnamed control character or NO-BREAK SPACE. */
605 if ((d = *(s + 2)) == '\0')
606 return 0;
608 if (*s == 0xe1 && c == 0x9a && d == 0x80)
610 s += 2;
611 goto next; /* OGHAM SPACE MARK. */
614 if (*s == 0xe1 && c == 0xa0 && d == 0x8e)
616 s += 2;
617 goto next; /* MONGOLIAN VOWEL SEPARATOR. */
620 if (*s == 0xe2 && c == 0x80 && d == 0x80)
622 s += 2;
623 goto next; /* EN QUAD. */
626 if (*s == 0xe2 && c == 0x80 && d == 0x81)
628 s += 2;
629 goto next; /* EM QUAD. */
632 if (*s == 0xe2 && c == 0x80 && d == 0x82)
634 s += 2;
635 goto next; /* EN SPACE. */
637 if (*s == 0xe2 && c == 0x80 && d == 0x83)
639 s += 2;
640 goto next; /* EM SPACE. */
643 if (*s == 0xe2 && c == 0x80 && d == 0x84)
645 s += 2;
646 goto next; /* THREE-PER-EM SPACE. */
649 if (*s == 0xe2 && c == 0x80 && d == 0x85)
651 s += 2;
652 goto next; /* FOUR-PER-EM SPACE. */
655 if (*s == 0xe2 && c == 0x80 && d == 0x86)
657 s += 2;
658 goto next; /* SIX-PER-EM SPACE. */
660 if (*s == 0xe2 && c == 0x80 && d == 0x87)
662 s += 2;
663 goto next; /* FIGURE SPACE. */
666 if (*s == 0xe2 && c == 0x80 && d == 0x88)
668 s += 2;
669 goto next; /* PUNCTUATION SPACE. */
672 if (*s == 0xe2 && c == 0x80 && d == 0x89)
674 s += 2;
675 goto next; /* THIN SPACE. */
678 if (*s == 0xe2 && c == 0x80 && d == 0x8a)
680 s += 2;
681 goto next; /* HAIR SPACE. */
684 if (*s == 0xe2 && c == 0x80 && d == 0xa8)
686 s += 2;
687 goto next; /* LINE SEPARATOR. */
690 if (*s == 0xe2 && c == 0x80 && d == 0xa9)
692 s += 2;
693 goto next; /* PARAGRAPH SEPARATOR. */
696 if (*s == 0xe2 && c == 0x80 && d == 0xaf)
698 s += 2;
699 goto next; /* NARROW NO-BREAK SPACE. */
702 if (*s == 0xe2 && c == 0x81 && d == 0x9f)
704 s += 2;
705 goto next; /* MEDIUM MATHEMATICAL SPACE. */
708 if (*s == 0xe3 && c == 0x80 && d == 0x80)
710 s += 2;
711 goto next; /* IDEOGRAPHIC SPACE. */
714 return 0;
716 next:
717 s++;
719 return 1;