1 /** Conversion functions
18 #include "intl/charsets.h" /* NBSP_CHAR */
19 #include "util/conv.h"
20 #include "util/error.h"
21 #include "util/string.h"
22 #include "util/time.h"
26 /** This function takes string @a s and stores the @a number (of a
27 * result width @a width) in string format there, starting at position
28 * [*@a slen]. If the number would take more space than @a width, it
29 * is truncated and only the _last_ digits of it are inserted to the
30 * string. If the number takes less space than @a width, it is padded
31 * by @a fillchar from left.
32 * @a base defined which base should be used (10, 16, 8, 2, ...)
33 * @a upper selects either hexa uppercased chars or lowercased chars.
35 * A NUL char is always added at the end of the string. @a s must point
36 * to a sufficiently large memory space, at least *@a slen + @a width + 1.
41 * elinks_ulongcat(s, NULL, 12345, 4, 0, 10, 0) : s = "2345"
42 * elinks_ulongcat(s, NULL, 255, 4, '*', 16, 1) : s = "**FF"
43 * elinks_ulongcat(s, NULL, 123, 5, '0', 10, 0) : s = "00123"
46 * Note that this function exists to provide a fast and efficient, however
47 * still quite powerful alternative to sprintf(). It is optimized for speed and
48 * is *MUCH* faster than sprintf(). If you can use it, use it ;-). But do not
49 * get too enthusiastic, do not use it in cases where it would break i18n.
51 * @returns 0 if OK or width needed for the whole number to fit there,
52 * if it had to be truncated. A negative value signs an error. */
54 elinks_ulongcat(unsigned char *s
, unsigned int *slen
,
55 unsigned long long number
, unsigned int width
,
56 unsigned char fillchar
, unsigned int base
,
59 static const unsigned char unum
[]= "0123456789ABCDEF";
60 static const unsigned char lnum
[]= "0123456789abcdef";
61 const unsigned char *to_num
= (upper
? unum
: lnum
);
62 unsigned int start
= slen
? *slen
: 0;
63 unsigned int nlen
= 1; /* '0' is one char, we can't have less. */
64 unsigned int pos
= start
; /* starting position of the number */
65 unsigned long long q
= number
;
68 if (width
< 1 || !s
|| base
< 2 || base
> 16) return -1;
70 /* Count the length of the number in chars. */
71 while (q
> (base
- 1)) {
76 /* If max. width attained, truncate. */
82 if (slen
) *slen
+= nlen
;
84 /* Fill left space with fillchar. */
86 /* ie. width = 4 nlen = 2 -> pad = 2 */
87 unsigned int pad
= width
- nlen
;
90 /* Relocate the start of number. */
91 if (slen
) *slen
+= pad
;
95 while (pad
> 0) s
[--pad
+ start
] = fillchar
;
101 /* Now write number starting from end. */
103 s
[--nlen
+ pos
] = to_num
[(number
% base
)];
110 /** Similar to elinks_ulongcat() but for @c long number. */
112 elinks_longcat(unsigned char *s
, unsigned int *slen
,
113 long long number
, unsigned int width
,
114 unsigned char fillchar
, unsigned int base
,
117 unsigned char *p
= s
;
119 if (number
< 0 && width
> 0) {
120 if (slen
) p
[(*slen
)++] = '-';
126 return elinks_ulongcat(p
, slen
, number
, width
, fillchar
, base
, upper
);
130 /** @relates string */
132 add_long_to_string(struct string
*string
, long long number
)
134 unsigned char buffer
[64];
139 if_assert_failed
{ return NULL
; }
141 width
= longcat(buffer
, &length
, number
, sizeof(buffer
) - 1, 0);
142 if (width
< 0 || !length
) return NULL
;
144 return add_bytes_to_string(string
, buffer
, length
);
147 /** @relates string */
149 add_knum_to_string(struct string
*string
, long long num
)
155 if (num
&& (num
/ (1024 * 1024)) * (1024 * 1024) == num
) {
156 ret
= longcat(&t
, &tlen
, num
/ (1024 * 1024), sizeof(t
) - 2, 0);
159 } else if (num
&& (num
/ 1024) * 1024 == num
) {
160 ret
= longcat(&t
, &tlen
, num
/ 1024, sizeof(t
) - 2, 0);
164 ret
= longcat(&t
, &tlen
, num
, sizeof(t
) - 1, 0);
167 if (ret
< 0 || !tlen
) return NULL
;
169 add_bytes_to_string(string
, t
, tlen
);
174 /** @relates string */
176 add_xnum_to_string(struct string
*string
, long long xnum
)
178 unsigned char suff
[3] = "\0i";
181 /* XXX: I don't completely like the computation of d here. --pasky */
182 /* Mebi (Mi), 2^20 */
183 if (xnum
>= 1024 * 1024) {
185 d
= (xnum
* (int) 10 / (int) ((int) (1024 * 1024))) % 10;
187 /* Kibi (Ki), 2^10 */
188 } else if (xnum
>= 1024) {
190 d
= (xnum
* (int) 10 / (int) 1024) % 10;
194 add_long_to_string(string
, xnum
);
197 add_char_to_string(string
, '.');
198 add_long_to_string(string
, d
);
200 add_char_to_string(string
, ' ');
202 if (suff
[0]) add_to_string(string
, suff
);
203 add_char_to_string(string
, 'B');
207 /** @relates string */
209 add_duration_to_string(struct string
*string
, long seconds
)
214 if (seconds
< 0) seconds
= 0;
217 if (seconds
>= (24 * 3600)) {
218 ulongcat(q
, &qlen
, (seconds
/ (24 * 3600)), 5, 0);
223 /* Hours and minutes */
224 if (seconds
>= 3600) {
225 seconds
%= (24 * 3600);
226 ulongcat(q
, &qlen
, (seconds
/ 3600), 4, 0);
228 ulongcat(q
, &qlen
, ((seconds
/ 60) % 60), 2, '0');
231 ulongcat(q
, &qlen
, (seconds
/ 60), 2, 0);
236 ulongcat(q
, &qlen
, (seconds
% 60), 2, '0');
238 add_to_string(string
, q
);
242 /** @relates string */
244 add_timeval_to_string(struct string
*string
, timeval_T
*timeval
)
246 return add_duration_to_string(string
, timeval_to_seconds(timeval
));
251 add_date_to_string(struct string
*string
, const unsigned char *fmt
,
254 unsigned char buffer
[MAX_STR_LEN
];
255 time_t when_time
= date
? *date
: time(NULL
);
256 struct tm
*when_local
= localtime(&when_time
);
258 if (strftime(buffer
, sizeof(buffer
), fmt
, when_local
) <= 0)
261 return add_to_string(string
, buffer
);
265 /* Encoders and string changers */
268 add_string_replace(struct string
*string
, unsigned char *src
, int len
,
269 unsigned char replaceable
, unsigned char replacement
)
271 int oldlength
= string
->length
;
273 if (!add_bytes_to_string(string
, src
, len
))
276 for (src
= string
->source
+ oldlength
; len
; len
--, src
++)
277 if (*src
== replaceable
)
284 add_html_to_string(struct string
*string
, const unsigned char *src
, int len
)
286 for (; len
; len
--, src
++) {
288 || *src
== '<' || *src
== '>' || *src
== '&'
289 || *src
== '\"' || *src
== '\'') {
290 int rollback_length
= string
->length
;
292 if (!add_bytes_to_string(string
, "&#", 2)
293 || !add_long_to_string(string
, (long long)*src
)
294 || !add_char_to_string(string
, ';')) {
295 string
->length
= rollback_length
;
296 string
->source
[rollback_length
] = '\0';
300 if (!add_char_to_string(string
, *src
))
309 add_cp_html_to_string(struct string
*string
, int src_codepage
,
310 const unsigned char *src
, int len
)
312 const unsigned char *const end
= src
+ len
;
313 unicode_val_T unicode
;
316 unicode
= cp_to_unicode(src_codepage
,
317 (unsigned char **) &src
, end
);
318 if (unicode
== UCS_NO_CHAR
)
321 if (unicode
< 0x20 || unicode
>= 0x7F
322 || unicode
== '<' || unicode
== '>' || unicode
== '&'
323 || unicode
== '\"' || unicode
== '\'') {
324 int rollback_length
= string
->length
;
326 if (!add_bytes_to_string(string
, "&#", 2)
327 || !add_long_to_string(string
, unicode
)
328 || !add_char_to_string(string
, ';')) {
329 string
->length
= rollback_length
;
330 string
->source
[rollback_length
] = '\0';
334 if (!add_char_to_string(string
, unicode
))
342 /* TODO Optimize later --pasky */
344 add_quoted_to_string(struct string
*string
, const unsigned char *src
, int len
)
346 for (; len
; len
--, src
++) {
347 if (isquote(*src
) || *src
== '\\')
348 add_char_to_string(string
, '\\');
349 add_char_to_string(string
, *src
);
356 add_shell_quoted_to_string(struct string
*string
, unsigned char *src
, int len
)
358 add_char_to_string(string
, '\'');
359 for (; len
; len
--, ++src
)
361 add_to_string(string
, "'\\''");
363 add_char_to_string(string
, *src
);
364 add_char_to_string(string
, '\'');
370 add_shell_safe_to_string(struct string
*string
, unsigned char *cmd
, int cmdlen
)
374 for (; cmdlen
; cmdlen
--, cmd
++) {
375 if ((*cmd
== '-' && prev_safe
) ||
376 (prev_safe
= is_safe_in_shell(*cmd
))) {
377 add_char_to_string(string
, *cmd
);
379 /* XXX: Not all programs we might exec are capable of
380 * decoding these. For some, we should just report
381 * an error rather than exec with an encoded string. */
382 add_char_to_string(string
, '%');
383 add_char_to_string(string
, hx((*cmd
& 0xf0) >> 4));
384 add_char_to_string(string
, hx(*cmd
& 0x0f));
393 strtolx(unsigned char *str
, unsigned char **end
)
396 unsigned char postfix
;
399 num
= strtol(str
, (char **) end
, 10);
401 if (!*end
) return num
;
403 postfix
= c_toupper(**end
);
404 if (postfix
== 'K') {
406 if (num
< -INT_MAX
/ 1024) return -INT_MAX
;
407 if (num
> INT_MAX
/ 1024) return INT_MAX
;
411 if (postfix
== 'M') {
413 if (num
< -INT_MAX
/ (1024 * 1024)) return -INT_MAX
;
414 if (num
> INT_MAX
/ (1024 * 1024)) return INT_MAX
;
415 return num
* (1024 * 1024);
422 month2num(const unsigned char *str
)
424 unsigned char month
[3] = { str
[0]|32, str
[1]|32, str
[2]|32 };
427 case 'j': /* jan, jun, jul */
428 if (month
[1] == 'a') {
429 if (month
[2] == 'n') return 0; /* jan */
432 if (month
[1] == 'u') {
433 if (month
[2] == 'n') return 5; /* jun */
434 if (month
[2] == 'l') return 6; /* jul */
437 case 'm': /* mar, may */
438 if (month
[1] == 'a') {
439 if (month
[2] == 'r') return 2; /* mar */
440 if (month
[2] == 'y') return 4; /* may */
443 case 'a': /* apr, aug */
444 if (month
[1] == 'p') {
445 if (month
[2] == 'r') return 3; /* apr */
448 if (month
[1] == 'u' && month
[2] == 'g') return 7; /* aug */
451 if (month
[1] == 'e' && month
[2] == 'p') return 8; /* sep */
454 if (month
[1] == 'c' && month
[2] == 't') return 9; /* oct */
457 if (month
[1] == 'o' && month
[2] == 'v') return 10; /* nov */
460 if (month
[1] == 'e' && month
[2] == 'c') return 11; /* dec */
463 if (month
[1] == 'e' && month
[2] == 'b') return 1; /* feb */
470 /** This function drops control chars, nbsp char and limit the number
471 * of consecutive space chars to one. It modifies its argument. */
473 clr_spaces(unsigned char *str
)
476 unsigned char *dest
= str
;
480 for (s
= str
; *s
; s
++)
481 if (*s
< ' ' || *s
== NBSP_CHAR
) *s
= ' ';
483 for (s
= str
; *s
; s
++) {
484 if (*s
== ' ' && (dest
== str
|| s
[1] == ' ' || !s
[1]))
493 /** Replace invalid chars in @a title with ' ' and trim all starting/ending
496 * update_bookmark() assumes this function does not switch translation
499 sanitize_title(unsigned char *title
)
501 int len
= strlen(title
);
506 if (title
[len
] < ' ' || title
[len
] == NBSP_CHAR
)
509 trim_chars(title
, ' ', NULL
);
512 /** Returns 0 if @a url contains invalid chars, 1 if ok.
513 * It trims starting/ending spaces. */
515 sanitize_url(unsigned char *url
)
517 int len
= strlen(url
);
525 trim_chars(url
, ' ', NULL
);
530 int c_tolower(int c
) {
533 case 'A': return 'a';
534 case 'B': return 'b';
535 case 'C': return 'c';
536 case 'D': return 'd';
537 case 'E': return 'e';
538 case 'F': return 'f';
539 case 'G': return 'g';
540 case 'H': return 'h';
541 case 'I': return 'i';
542 case 'J': return 'j';
543 case 'K': return 'k';
544 case 'L': return 'l';
545 case 'M': return 'm';
546 case 'N': return 'n';
547 case 'O': return 'o';
548 case 'P': return 'p';
549 case 'Q': return 'q';
550 case 'R': return 'r';
551 case 'S': return 's';
552 case 'T': return 't';
553 case 'U': return 'u';
554 case 'V': return 'v';
555 case 'W': return 'w';
556 case 'X': return 'x';
557 case 'Y': return 'y';
558 case 'Z': return 'z';
563 int c_toupper(int c
) {
565 case 'a': return 'A';
566 case 'b': return 'B';
567 case 'c': return 'C';
568 case 'd': return 'D';
569 case 'e': return 'E';
570 case 'f': return 'F';
571 case 'g': return 'G';
572 case 'h': return 'H';
573 case 'i': return 'I';
574 case 'j': return 'J';
575 case 'k': return 'K';
576 case 'l': return 'L';
577 case 'm': return 'M';
578 case 'n': return 'N';
579 case 'o': return 'O';
580 case 'p': return 'P';
581 case 'q': return 'Q';
582 case 'r': return 'R';
583 case 's': return 'S';
584 case 't': return 'T';
585 case 'u': return 'U';
586 case 'v': return 'V';
587 case 'w': return 'W';
588 case 'x': return 'X';
589 case 'y': return 'Y';
590 case 'z': return 'Z';
595 int c_isupper (int c
)
599 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
600 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
601 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
602 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
610 int c_islower (int c
)
614 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
615 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
616 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
617 case 's': case 't': case 'u': case 'v': case 'w': case 'x':