iconv: Bail out of the loop when an illegal sequence of bytes occurs.
[elinks/elinks-j605.git] / src / util / conv.c
blob5ff830c054ad8251f4ae5c93e93129926a576a68
1 /** Conversion functions
2 * @file */
4 #ifdef HAVE_CONFIG_H
5 #include "config.h"
6 #endif
8 #include <ctype.h>
9 #include <errno.h>
10 #ifdef HAVE_LIMITS_H
11 #include <limits.h>
12 #endif
13 #include <stdlib.h>
14 #include <string.h>
16 #include "elinks.h"
18 #include "intl/charsets.h" /* NBSP_CHAR */
19 #include "util/conv.h"
20 #include "util/error.h"
21 #include "util/string.h"
22 #include "util/time.h"
26 /** This function takes string @a s and stores the @a number (of a
27 * result width @a width) in string format there, starting at position
28 * [*@a slen]. If the number would take more space than @a width, it
29 * is truncated and only the _last_ digits of it are inserted to the
30 * string. If the number takes less space than @a width, it is padded
31 * by @a fillchar from left.
32 * @a base defined which base should be used (10, 16, 8, 2, ...)
33 * @a upper selects either hexa uppercased chars or lowercased chars.
35 * A NUL char is always added at the end of the string. @a s must point
36 * to a sufficiently large memory space, at least *@a slen + @a width + 1.
38 * Examples:
40 * @code
41 * elinks_ulongcat(s, NULL, 12345, 4, 0, 10, 0) : s = "2345"
42 * elinks_ulongcat(s, NULL, 255, 4, '*', 16, 1) : s = "**FF"
43 * elinks_ulongcat(s, NULL, 123, 5, '0', 10, 0) : s = "00123"
44 * @endcode
46 * Note that this function exists to provide a fast and efficient, however
47 * still quite powerful alternative to sprintf(). It is optimized for speed and
48 * is *MUCH* faster than sprintf(). If you can use it, use it ;-). But do not
49 * get too enthusiastic, do not use it in cases where it would break i18n.
51 * @returns 0 if OK or width needed for the whole number to fit there,
52 * if it had to be truncated. A negative value signs an error. */
53 NONSTATIC_INLINE int
54 elinks_ulongcat(unsigned char *s, unsigned int *slen,
55 unsigned long long number, unsigned int width,
56 unsigned char fillchar, unsigned int base,
57 unsigned int upper)
59 static const unsigned char unum[]= "0123456789ABCDEF";
60 static const unsigned char lnum[]= "0123456789abcdef";
61 const unsigned char *to_num = (upper ? unum : lnum);
62 unsigned int start = slen ? *slen : 0;
63 unsigned int nlen = 1; /* '0' is one char, we can't have less. */
64 unsigned int pos = start; /* starting position of the number */
65 unsigned long long q = number;
66 int ret = 0;
68 if (width < 1 || !s || base < 2 || base > 16) return -1;
70 /* Count the length of the number in chars. */
71 while (q > (base - 1)) {
72 nlen++;
73 q /= base;
76 /* If max. width attained, truncate. */
77 if (nlen > width) {
78 ret = nlen;
79 nlen = width;
82 if (slen) *slen += nlen;
84 /* Fill left space with fillchar. */
85 if (fillchar) {
86 /* ie. width = 4 nlen = 2 -> pad = 2 */
87 unsigned int pad = width - nlen;
89 if (pad > 0) {
90 /* Relocate the start of number. */
91 if (slen) *slen += pad;
92 pos += pad;
94 /* Pad. */
95 while (pad > 0) s[--pad + start] = fillchar;
99 s[pos + nlen] = '\0';
101 /* Now write number starting from end. */
102 while (nlen > 0) {
103 s[--nlen + pos] = to_num[(number % base)];
104 number /= base;
107 return ret;
110 /** Similar to elinks_ulongcat() but for @c long number. */
111 NONSTATIC_INLINE int
112 elinks_longcat(unsigned char *s, unsigned int *slen,
113 long long number, unsigned int width,
114 unsigned char fillchar, unsigned int base,
115 unsigned int upper)
117 unsigned char *p = s;
119 if (number < 0 && width > 0) {
120 if (slen) p[(*slen)++] = '-';
121 else *(p++) = '-';
122 number = -number;
123 width--;
126 return elinks_ulongcat(p, slen, number, width, fillchar, base, upper);
130 /** @relates string */
131 struct string *
132 add_long_to_string(struct string *string, long long number)
134 unsigned char buffer[64];
135 int length = 0;
136 int width;
138 assert(string);
139 if_assert_failed { return NULL; }
141 width = longcat(buffer, &length, number, sizeof(buffer) - 1, 0);
142 if (width < 0 || !length) return NULL;
144 return add_bytes_to_string(string, buffer, length);
147 /** @relates string */
148 struct string *
149 add_knum_to_string(struct string *string, long long num)
151 int ret;
152 unsigned char t[64];
153 int tlen = 0;
155 if (num && (num / (1024 * 1024)) * (1024 * 1024) == num) {
156 ret = longcat(&t, &tlen, num / (1024 * 1024), sizeof(t) - 2, 0);
157 t[tlen++] = 'M';
158 t[tlen] = '\0';
159 } else if (num && (num / 1024) * 1024 == num) {
160 ret = longcat(&t, &tlen, num / 1024, sizeof(t) - 2, 0);
161 t[tlen++] = 'k';
162 t[tlen] = '\0';
163 } else {
164 ret = longcat(&t, &tlen, num, sizeof(t) - 1, 0);
167 if (ret < 0 || !tlen) return NULL;
169 add_bytes_to_string(string, t, tlen);
171 return string;
174 /** @relates string */
175 struct string *
176 add_xnum_to_string(struct string *string, long long xnum)
178 unsigned char suff[3] = "\0i";
179 off_t d = -1;
181 /* XXX: I don't completely like the computation of d here. --pasky */
182 /* Mebi (Mi), 2^20 */
183 if (xnum >= 1024 * 1024) {
184 suff[0] = 'M';
185 d = (xnum * (int) 10 / (int) ((int) (1024 * 1024))) % 10;
186 xnum /= 1024*1024;
187 /* Kibi (Ki), 2^10 */
188 } else if (xnum >= 1024) {
189 suff[0] = 'K';
190 d = (xnum * (int) 10 / (int) 1024) % 10;
191 xnum /= 1024;
194 add_long_to_string(string, xnum);
196 if (d != -1) {
197 add_char_to_string(string, '.');
198 add_long_to_string(string, d);
200 add_char_to_string(string, ' ');
202 if (suff[0]) add_to_string(string, suff);
203 add_char_to_string(string, 'B');
204 return string;
207 /** @relates string */
208 struct string *
209 add_duration_to_string(struct string *string, long seconds)
211 unsigned char q[64];
212 int qlen = 0;
214 if (seconds < 0) seconds = 0;
216 /* Days */
217 if (seconds >= (24 * 3600)) {
218 ulongcat(q, &qlen, (seconds / (24 * 3600)), 5, 0);
219 q[qlen++] = 'd';
220 q[qlen++] = ' ';
223 /* Hours and minutes */
224 if (seconds >= 3600) {
225 seconds %= (24 * 3600);
226 ulongcat(q, &qlen, (seconds / 3600), 4, 0);
227 q[qlen++] = ':';
228 ulongcat(q, &qlen, ((seconds / 60) % 60), 2, '0');
229 } else {
230 /* Only minutes */
231 ulongcat(q, &qlen, (seconds / 60), 2, 0);
234 /* Seconds */
235 q[qlen++] = ':';
236 ulongcat(q, &qlen, (seconds % 60), 2, '0');
238 add_to_string(string, q);
239 return string;
242 /** @relates string */
243 struct string *
244 add_timeval_to_string(struct string *string, timeval_T *timeval)
246 return add_duration_to_string(string, timeval_to_seconds(timeval));
249 #ifdef HAVE_STRFTIME
250 struct string *
251 add_date_to_string(struct string *string, const unsigned char *fmt,
252 const time_t *date)
254 unsigned char buffer[MAX_STR_LEN];
255 time_t when_time = date ? *date : time(NULL);
256 struct tm *when_local = localtime(&when_time);
258 if (strftime(buffer, sizeof(buffer), fmt, when_local) <= 0)
259 return NULL;
261 return add_to_string(string, buffer);
263 #endif
265 /* Encoders and string changers */
267 struct string *
268 add_string_replace(struct string *string, unsigned char *src, int len,
269 unsigned char replaceable, unsigned char replacement)
271 int oldlength = string->length;
273 if (!add_bytes_to_string(string, src, len))
274 return NULL;
276 for (src = string->source + oldlength; len; len--, src++)
277 if (*src == replaceable)
278 *src = replacement;
280 return string;
283 struct string *
284 add_html_to_string(struct string *string, const unsigned char *src, int len)
286 for (; len; len--, src++) {
287 if (*src < 0x20
288 || *src == '<' || *src == '>' || *src == '&'
289 || *src == '\"' || *src == '\'') {
290 int rollback_length = string->length;
292 if (!add_bytes_to_string(string, "&#", 2)
293 || !add_long_to_string(string, (long long)*src)
294 || !add_char_to_string(string, ';')) {
295 string->length = rollback_length;
296 string->source[rollback_length] = '\0';
297 return NULL;
299 } else {
300 if (!add_char_to_string(string, *src))
301 return NULL;
305 return string;
308 struct string *
309 add_cp_html_to_string(struct string *string, int src_codepage,
310 const unsigned char *src, int len)
312 const unsigned char *const end = src + len;
313 unicode_val_T unicode;
315 for (;;) {
316 unicode = cp_to_unicode(src_codepage,
317 (unsigned char **) &src, end);
318 if (unicode == UCS_NO_CHAR)
319 break;
321 if (unicode < 0x20 || unicode >= 0x7F
322 || unicode == '<' || unicode == '>' || unicode == '&'
323 || unicode == '\"' || unicode == '\'') {
324 int rollback_length = string->length;
326 if (!add_bytes_to_string(string, "&#", 2)
327 || !add_long_to_string(string, unicode)
328 || !add_char_to_string(string, ';')) {
329 string->length = rollback_length;
330 string->source[rollback_length] = '\0';
331 return NULL;
333 } else {
334 if (!add_char_to_string(string, unicode))
335 return NULL;
339 return string;
342 /* TODO Optimize later --pasky */
343 struct string *
344 add_quoted_to_string(struct string *string, const unsigned char *src, int len)
346 for (; len; len--, src++) {
347 if (isquote(*src) || *src == '\\')
348 add_char_to_string(string, '\\');
349 add_char_to_string(string, *src);
352 return string;
355 struct string *
356 add_shell_quoted_to_string(struct string *string, unsigned char *src, int len)
358 add_char_to_string(string, '\'');
359 for (; len; len--, ++src)
360 if (*src == '\'')
361 add_to_string(string, "'\\''");
362 else
363 add_char_to_string(string, *src);
364 add_char_to_string(string, '\'');
366 return string;
369 struct string *
370 add_shell_safe_to_string(struct string *string, unsigned char *cmd, int cmdlen)
372 int prev_safe = 0;
374 for (; cmdlen; cmdlen--, cmd++) {
375 if ((*cmd == '-' && prev_safe) ||
376 (prev_safe = is_safe_in_shell(*cmd))) {
377 add_char_to_string(string, *cmd);
378 } else {
379 /* XXX: Not all programs we might exec are capable of
380 * decoding these. For some, we should just report
381 * an error rather than exec with an encoded string. */
382 add_char_to_string(string, '%');
383 add_char_to_string(string, hx((*cmd & 0xf0) >> 4));
384 add_char_to_string(string, hx(*cmd & 0x0f));
388 return string;
392 long
393 strtolx(unsigned char *str, unsigned char **end)
395 long num;
396 unsigned char postfix;
398 errno = 0;
399 num = strtol(str, (char **) end, 10);
400 if (errno) return 0;
401 if (!*end) return num;
403 postfix = c_toupper(**end);
404 if (postfix == 'K') {
405 (*end)++;
406 if (num < -INT_MAX / 1024) return -INT_MAX;
407 if (num > INT_MAX / 1024) return INT_MAX;
408 return num * 1024;
411 if (postfix == 'M') {
412 (*end)++;
413 if (num < -INT_MAX / (1024 * 1024)) return -INT_MAX;
414 if (num > INT_MAX / (1024 * 1024)) return INT_MAX;
415 return num * (1024 * 1024);
418 return num;
422 month2num(const unsigned char *str)
424 unsigned char month[3] = { str[0]|32, str[1]|32, str[2]|32 };
426 switch (month[0]) {
427 case 'j': /* jan, jun, jul */
428 if (month[1] == 'a') {
429 if (month[2] == 'n') return 0; /* jan */
430 return -1;
432 if (month[1] == 'u') {
433 if (month[2] == 'n') return 5; /* jun */
434 if (month[2] == 'l') return 6; /* jul */
436 return -1;
437 case 'm': /* mar, may */
438 if (month[1] == 'a') {
439 if (month[2] == 'r') return 2; /* mar */
440 if (month[2] == 'y') return 4; /* may */
442 return -1;
443 case 'a': /* apr, aug */
444 if (month[1] == 'p') {
445 if (month[2] == 'r') return 3; /* apr */
446 return -1;
448 if (month[1] == 'u' && month[2] == 'g') return 7; /* aug */
449 return -1;
450 case 's':
451 if (month[1] == 'e' && month[2] == 'p') return 8; /* sep */
452 return -1;
453 case 'o':
454 if (month[1] == 'c' && month[2] == 't') return 9; /* oct */
455 return -1;
456 case 'n':
457 if (month[1] == 'o' && month[2] == 'v') return 10; /* nov */
458 return -1;
459 case 'd':
460 if (month[1] == 'e' && month[2] == 'c') return 11; /* dec */
461 return -1;
462 case 'f':
463 if (month[1] == 'e' && month[2] == 'b') return 1; /* feb */
464 return -1;
465 default:
466 return -1;
470 /** This function drops control chars, nbsp char and limit the number
471 * of consecutive space chars to one. It modifies its argument. */
472 void
473 clr_spaces(unsigned char *str)
475 unsigned char *s;
476 unsigned char *dest = str;
478 assert(str);
480 for (s = str; *s; s++)
481 if (*s < ' ' || *s == NBSP_CHAR) *s = ' ';
483 for (s = str; *s; s++) {
484 if (*s == ' ' && (dest == str || s[1] == ' ' || !s[1]))
485 continue;
487 *dest++ = *s;
490 *dest = '\0';
493 /** Replace invalid chars in @a title with ' ' and trim all starting/ending
494 * spaces.
496 * update_bookmark() assumes this function does not switch translation
497 * tables. */
498 void
499 sanitize_title(unsigned char *title)
501 int len = strlen(title);
503 if (!len) return;
505 while (len--) {
506 if (title[len] < ' ' || title[len] == NBSP_CHAR)
507 title[len] = ' ';
509 trim_chars(title, ' ', NULL);
512 /** Returns 0 if @a url contains invalid chars, 1 if ok.
513 * It trims starting/ending spaces. */
515 sanitize_url(unsigned char *url)
517 int len = strlen(url);
519 if (!len) return 1;
521 while (len--) {
522 if (url[len] < ' ')
523 return 0;
525 trim_chars(url, ' ', NULL);
526 return 1;
530 int c_tolower(int c) {
531 switch (c)
533 case 'A': return 'a';
534 case 'B': return 'b';
535 case 'C': return 'c';
536 case 'D': return 'd';
537 case 'E': return 'e';
538 case 'F': return 'f';
539 case 'G': return 'g';
540 case 'H': return 'h';
541 case 'I': return 'i';
542 case 'J': return 'j';
543 case 'K': return 'k';
544 case 'L': return 'l';
545 case 'M': return 'm';
546 case 'N': return 'n';
547 case 'O': return 'o';
548 case 'P': return 'p';
549 case 'Q': return 'q';
550 case 'R': return 'r';
551 case 'S': return 's';
552 case 'T': return 't';
553 case 'U': return 'u';
554 case 'V': return 'v';
555 case 'W': return 'w';
556 case 'X': return 'x';
557 case 'Y': return 'y';
558 case 'Z': return 'z';
559 default: return c;
563 int c_toupper(int c) {
564 switch (c) {
565 case 'a': return 'A';
566 case 'b': return 'B';
567 case 'c': return 'C';
568 case 'd': return 'D';
569 case 'e': return 'E';
570 case 'f': return 'F';
571 case 'g': return 'G';
572 case 'h': return 'H';
573 case 'i': return 'I';
574 case 'j': return 'J';
575 case 'k': return 'K';
576 case 'l': return 'L';
577 case 'm': return 'M';
578 case 'n': return 'N';
579 case 'o': return 'O';
580 case 'p': return 'P';
581 case 'q': return 'Q';
582 case 'r': return 'R';
583 case 's': return 'S';
584 case 't': return 'T';
585 case 'u': return 'U';
586 case 'v': return 'V';
587 case 'w': return 'W';
588 case 'x': return 'X';
589 case 'y': return 'Y';
590 case 'z': return 'Z';
591 default: return c;
595 int c_isupper (int c)
597 switch (c)
599 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
600 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
601 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
602 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
603 case 'Y': case 'Z':
604 return 1;
605 default:
606 return 0;
610 int c_islower (int c)
612 switch (c)
614 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
615 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
616 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
617 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
618 case 'y': case 'z':
619 return 1;
620 default:
621 return 0;