2 * Copyright 2004-2008, François Revol, <revol@free.fr>.
3 * Distributed under the terms of the MIT License.
8 #include "string_utils.h"
13 #define printf dprintf
19 char *urlify_string(const char *str
)
23 const char *allowed
= "abcdefghijklmnopqrstuvwxyz" \
24 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
26 "-_.!~*'()"; /* cf. RFC 2396 */
27 const char *hex
= "0123456789ABCDEF";
31 dst
= malloc(strlen(str
)*3);
34 for (p
= str
, d
= dst
; *p
; p
++) {
35 if (strchr(allowed
, *p
))
42 *d
++ = hex
[(*(unsigned char *)p
>> 4) & 0x0F];
43 *d
++ = hex
[(*(unsigned char *)p
) & 0x0F];
50 // cf. http://www.htmlhelp.com/reference/html40/entities/
52 static const char *entities_tab
[][2] = {
79 char *unentitify_string(const char *str
)
83 const char *hex
= "0123456789abcdef";
88 dst
= malloc(strlen(str
)+2);
91 for (p
= str
, d
= dst
; *p
; p
++) {
94 /* those case convert to binary, but won't check for valid multibyte UTF-8 sequences */
95 else if ((p
[1] == '#') && p
[2] && p
[3] && (p
[4] == ';') &&
99 char c
= ((p
[2]) - '0') * 10 +
103 } else if ((p
[1] == '#') && p
[2] && p
[3] && p
[4] && (p
[5] == ';') &&
108 char c
= ((p
[2]) - '0') * 100 +
109 ((p
[3]) - '0') * 10 +
113 } else if ((p
[1] == '#') && (p
[2] == 'x') && p
[3] && p
[4] && (p
[5] == ';') &&
114 strchr(hex
, tolower(p
[3])) &&
115 strchr(hex
, tolower(p
[4]))) {
117 char c
= (strchr(hex
, tolower(p
[3])) - hex
) << 4 |
118 (strchr(hex
, tolower(p
[4])) - hex
);
123 strncpy(buf
, p
+1, 20);
125 if (!strchr(buf
, ';')) {
129 *(strchr(buf
, ';')) = '\0';
130 for (i
= 0; entities_tab
[i
][0]; i
++) {
131 if (!strcmp(buf
, entities_tab
[i
][0])) {
132 strcpy(d
, entities_tab
[i
][1]);
134 p
+= strlen(entities_tab
[i
][0]) + 1;
138 if (!entities_tab
[i
][0]) /* not found */
147 int main(int argc
, char **argv
)
152 p
= unentitify_string(argv
[1]);