vfs: check userland buffers before reading them.
[haiku.git] / src / add-ons / kernel / file_systems / googlefs / string_utils.c
blob4482c72cb0353616fe453e81c548ba4c166beacf
1 /*
2 * Copyright 2004-2008, François Revol, <revol@free.fr>.
3 * Distributed under the terms of the MIT License.
4 */
6 #include <malloc.h>
7 #include <string.h>
8 #include "string_utils.h"
10 //#define TESTME
12 #ifdef _KERNEL_MODE
13 #define printf dprintf
14 #undef TESTME
15 #endif
19 char *urlify_string(const char *str)
21 char *dst, *d;
22 const char *p;
23 const char *allowed = "abcdefghijklmnopqrstuvwxyz" \
24 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
25 "0123456789" \
26 "-_.!~*'()"; /* cf. RFC 2396 */
27 const char *hex = "0123456789ABCDEF";
28 if (!str)
29 return NULL;
30 // hacky, but safe
31 dst = malloc(strlen(str)*3);
32 if (!dst)
33 return NULL;
34 for (p = str, d = dst; *p; p++) {
35 if (strchr(allowed, *p))
36 *d++ = *p;
37 else if (*p == ' ') {
38 *d++ = '+';
39 } else {
40 /* use hex value */
41 *d++ = '%';
42 *d++ = hex[(*(unsigned char *)p >> 4) & 0x0F];
43 *d++ = hex[(*(unsigned char *)p) & 0x0F];
46 *d = '\0';
47 return dst;
50 // cf. http://www.htmlhelp.com/reference/html40/entities/
52 static const char *entities_tab[][2] = {
53 { "lt", "<" },
54 { "gt", ">" },
55 { "amp", "&" },
56 { "nbsp", " " },
57 { "quot", "\"" },
58 { "raquo", "»" },
59 //{ "laquo", "" },
60 { "ccedil", "ç" },
61 // grave
62 { "agrave", "à" },
63 { "egrave", "è" },
64 // acute
65 //{ "aacute", "" },
66 { "eacute", "é" },
67 // circ
68 { "acirc", "â" },
69 { "ecirc", "ê" },
70 { "icirc", "î" },
71 { "ocirc", "ô" },
72 { "ucirc", "û" },
73 { "copy", "©" },
74 { "trade", "™" },
75 //{ "", "" },
76 { NULL, NULL },
79 char *unentitify_string(const char *str)
81 char *dst, *d;
82 const char *p;
83 const char *hex = "0123456789abcdef";
84 int i;
85 if (!str)
86 return NULL;
87 // hacky, but safe
88 dst = malloc(strlen(str)+2);
89 if (!dst)
90 return NULL;
91 for (p = str, d = dst; *p; p++) {
92 if (*p != '&')
93 *d++ = *p;
94 /* those case convert to binary, but won't check for valid multibyte UTF-8 sequences */
95 else if ((p[1] == '#') && p[2] && p[3] && (p[4] == ';') &&
96 isdigit(p[2]) &&
97 isdigit(p[3])) {
98 /* &#nn; */
99 char c = ((p[2]) - '0') * 10 +
100 ((p[3]) - '0');
101 *d++ = c;
102 p += 4;
103 } else if ((p[1] == '#') && p[2] && p[3] && p[4] && (p[5] == ';') &&
104 isdigit(p[2]) &&
105 isdigit(p[3]) &&
106 isdigit(p[4])) {
107 /* &#nnn; */
108 char c = ((p[2]) - '0') * 100 +
109 ((p[3]) - '0') * 10 +
110 ((p[4]) - '0');
111 *d++ = c;
112 p += 5;
113 } else if ((p[1] == '#') && (p[2] == 'x') && p[3] && p[4] && (p[5] == ';') &&
114 strchr(hex, tolower(p[3])) &&
115 strchr(hex, tolower(p[4]))) {
116 /* &#xnn; */
117 char c = (strchr(hex, tolower(p[3])) - hex) << 4 |
118 (strchr(hex, tolower(p[4])) - hex);
119 *d++ = c;
120 p += 5;
121 } else {
122 char buf[20];
123 strncpy(buf, p+1, 20);
124 buf[19] = '\0';
125 if (!strchr(buf, ';')) {
126 *d++ = *p;
127 continue;
129 *(strchr(buf, ';')) = '\0';
130 for (i = 0; entities_tab[i][0]; i++) {
131 if (!strcmp(buf, entities_tab[i][0])) {
132 strcpy(d, entities_tab[i][1]);
133 d += strlen(d);
134 p += strlen(entities_tab[i][0]) + 1;
135 break;
138 if (!entities_tab[i][0]) /* not found */
139 *d++ = '&';
142 *d = '\0';
143 return dst;
146 #ifdef TESTME
147 int main(int argc, char **argv)
149 char *p;
150 if (argc < 2)
151 return 1;
152 p = unentitify_string(argv[1]);
153 printf("'%s'\n", p);
154 free(p);
155 free(malloc(10));
156 return 0;
158 #endif