Fix CID 1491093: attrib leaked if attvalue is null
[claws.git] / src / entity.c
blob1c9098945c2a65d0aeb09fbf76a8caf363d3a761
1 /*
2 * Claws Mail -- a GTK based, lightweight, and fast e-mail client
3 * Copyright (C) 2017 Ricardo Mones and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #include "claws-features.h"
21 #endif
23 #include "utils.h"
25 #define ENTITY_MAX_LEN 8
26 #define DECODED_MAX_LEN 6
28 static GHashTable *symbol_table = NULL;
30 typedef struct _EntitySymbol EntitySymbol;
32 struct _EntitySymbol
34 gchar *const key;
35 gchar *const value;
38 /* in alphabetical order with upper-case version first */
39 static EntitySymbol symbolic_entities[] = {
40 /* A */
41 {"Aacute", "Á"},
42 {"aacute", "á"},
43 {"Acirc", "Â"},
44 {"acirc", "â"},
45 {"acute", "´"},
46 {"AElig", "Æ"},
47 {"aelig", "æ"},
48 {"Agrave", "À"},
49 {"agrave", "à"},
50 {"alefsym", "ℵ"},
51 {"Alpha", "Α"},
52 {"alpha", "α"},
53 {"amp", "&"},
54 {"and", "∧"},
55 {"ang", "∠"},
56 {"apos", "'"},
57 {"Aring", "Å"},
58 {"aring", "å"},
59 {"asymp", "≈"},
60 {"Atilde", "Ã"},
61 {"atilde", "ã"},
62 {"Auml", "Ä"},
63 {"auml", "ä"},
64 /* B */
65 {"bdquo", "„"},
66 {"Beta", "Β"},
67 {"beta", "β"},
68 {"brvbar", "¦"},
69 {"bull", "•"},
70 /* C */
71 {"cap", "∩"},
72 {"Ccedil", "Ç"},
73 {"ccedil", "ç"},
74 {"cedil", "¸"},
75 {"cent", "¢"},
76 {"Chi", "Χ"},
77 {"chi", "χ"},
78 {"circ", "ˆ"},
79 {"clubs", "♣"},
80 {"cong", "≅"},
81 {"copy", "©"},
82 {"crarr", "↵"},
83 {"cup", "∪"},
84 {"curren", "¤"},
85 /* D */
86 {"dagger", "†"},
87 {"Dagger", "‡"},
88 {"dArr", "⇓"},
89 {"darr", "↓"},
90 {"deg", "°"},
91 {"Delta", "Δ"},
92 {"delta", "δ"},
93 {"diams", "♦"},
94 {"divide", "÷"},
95 /* E */
96 {"Eacute", "É"},
97 {"eacute", "é"},
98 {"Ecirc", "Ê"},
99 {"ecirc", "ê"},
100 {"Egrave", "È"},
101 {"egrave", "è"},
102 {"empty", "∅"},
103 {"emsp", "\xE2\x80\x83"},
104 {"ensp", "\xE2\x80\x82"},
105 {"Epsilon", "Ε"},
106 {"epsilon", "ε"},
107 {"equiv", "≡"},
108 {"Eta", "Η"},
109 {"eta", "η"},
110 {"ETH", "Ð"},
111 {"eth", "ð"},
112 {"Euml", "Ë"},
113 {"euml", "ë"},
114 {"euro", "€"},
115 {"exist", "∃"},
116 /* F */
117 {"fnof", "ƒ"},
118 {"forall", "∀"},
119 {"frac12", "½"},
120 {"frac14", "¼"},
121 {"frac34", "¾"},
122 {"frasl", "⁄"},
123 /* G */
124 {"Gamma", "Γ"},
125 {"gamma", "γ"},
126 {"ge", "≥"},
127 {"gt", ">"},
128 /* H */
129 {"hArr", "⇔"},
130 {"harr", "↔"},
131 {"hearts", "♥"},
132 {"hellip", "…"},
133 /* I */
134 {"Iacute", "Í"},
135 {"iacute", "í"},
136 {"IArr", "⇐"},
137 {"Icirc", "Î"},
138 {"icirc", "î"},
139 {"iexcl", "¡"},
140 {"Igrave", "Ì"},
141 {"igrave", "ì"},
142 {"image", "ℑ"},
143 {"infin", "∞"},
144 {"int", "∫"},
145 {"Iota", "Ι"},
146 {"iota", "ι"},
147 {"iquest", "¿"},
148 {"isin", "∈"},
149 {"Iuml", "Ï"},
150 {"iuml", "ï"},
151 /* K */
152 {"Kappa", "Κ"},
153 {"kappa", "κ"},
154 /* L */
155 {"Lambda", "Λ"},
156 {"lambda", "λ"},
157 {"lang", "〈"},
158 {"laquo", "«"},
159 {"larr", "←"},
160 {"lceil", "⌈"},
161 {"ldquo", "“"},
162 {"le", "≤"},
163 {"lfloor", "⌊"},
164 {"lowast", "∗"},
165 {"loz", "◊"},
166 {"lrm", "\xE2\x80\x8E"},
167 {"lsaquo", "‹"},
168 {"lsquo", "‘"},
169 {"lt", "<"},
170 /* M */
171 {"macr", "¯"},
172 {"mdash", "—"},
173 {"micro", "µ"},
174 {"middot", "·"},
175 {"minus", "−"},
176 {"Mu", "Μ"},
177 {"mu", "μ"},
178 /* N */
179 {"nabla", "∇"},
180 {"nbsp", "\xC2\xA0"},
181 {"ndash", "–"},
182 {"ne", "≠"},
183 {"ni", "∋"},
184 {"not", "¬"},
185 {"notin", "∉"},
186 {"nsub", "⊄"},
187 {"Ntilde", "Ñ"},
188 {"ntilde", "ñ"},
189 {"Nu", "Ν"},
190 {"nu", "ν"},
191 /* O */
192 {"Oacute", "Ó"},
193 {"oacute", "ó"},
194 {"Ocirc", "Ô"},
195 {"ocirc", "ô"},
196 {"OElig", "Œ"},
197 {"oelig", "œ"},
198 {"Ograve", "Ò"},
199 {"ograve", "ò"},
200 {"oline", "‾"},
201 {"Omega", "Ω"},
202 {"omega", "ω"},
203 {"Omicron", "Ο"},
204 {"omicron", "ο"},
205 {"oplus", "⊕"},
206 {"or", "∨"},
207 {"ordf", "ª"},
208 {"ordm", "º"},
209 {"Oslash", "Ø"},
210 {"oslash", "ø"},
211 {"Otilde", "Õ"},
212 {"otilde", "õ"},
213 {"otimes", "⊗"},
214 {"Ouml", "Ö"},
215 {"ouml", "ö"},
216 /* P */
217 {"para", "¶"},
218 {"part", "∂"},
219 {"permil", "‰"},
220 {"perp", "⊥"},
221 {"Phi", "Φ"},
222 {"phi", "φ"},
223 {"Pi", "Π"},
224 {"pi", "π"},
225 {"piv", "ϖ"},
226 {"plusmn", "±"},
227 {"pound", "£"},
228 {"Prime", "″"},
229 {"prime", "′"},
230 {"prod", "∏"},
231 {"prop", "∝"},
232 {"Psi", "Ψ"},
233 {"psi", "ψ"},
234 /* Q */
235 {"quot", "\""},
236 /* R */
237 {"radic", "√"},
238 {"rang", "〉"},
239 {"raquo", "»"},
240 {"rArr", "⇒"},
241 {"rarr", "→"},
242 {"rceil", "⌉"},
243 {"rdquo", "”"},
244 {"real", "ℜ"},
245 {"reg", "®"},
246 {"rfloor", "⌋"},
247 {"Rho", "Ρ"},
248 {"rho", "ρ"},
249 {"rlm", "\xE2\x80\x8F"},
250 {"rsaquo", "›"},
251 {"rsquo", "’"},
252 /* S */
253 {"sbquo", "‚"},
254 {"Scaron", "Š"},
255 {"scaron", "š"},
256 {"sdot", "⋅"},
257 {"sect", "§"},
258 {"shy", "\xC2\xAD"},
259 {"Sigma", "Σ"},
260 {"sigma", "σ"},
261 {"sigmaf", "ς"},
262 {"sim", "∼"},
263 {"spades", "♠"},
264 {"sub", "⊂"},
265 {"sube", "⊆"},
266 {"sum", "∑"},
267 {"sup", "⊃"},
268 {"sup1", "¹"},
269 {"sup2", "²"},
270 {"sup3", "³"},
271 {"supe", "⊇"},
272 {"szlig", "ß"},
273 /* T */
274 {"Tau", "Τ"},
275 {"tau", "τ"},
276 {"there4", "∴"},
277 {"Theta", "Θ"},
278 {"theta", "θ"},
279 {"thetasym", "ϑ"},
280 {"thinsp", "\xE2\x80\x89"},
281 {"THORN", "Þ"},
282 {"thorn", "þ"},
283 {"tilde", "˜"},
284 {"times", "×"},
285 {"trade", "™"},
286 /* U */
287 {"Uacute", "Ú"},
288 {"uacute", "ú"},
289 {"uArr", "⇑"},
290 {"uarr", "↑"},
291 {"Ucirc", "Û"},
292 {"ucirc", "û"},
293 {"Ugrave", "Ù"},
294 {"ugrave", "ù"},
295 {"uml", "¨"},
296 {"upsih", "ϒ"},
297 {"Upsilon", "Υ"},
298 {"upsilon", "υ"},
299 {"Uuml", "Ü"},
300 {"uuml", "ü"},
301 /* W */
302 {"weierp", "℘"},
303 /* X */
304 {"Xi", "Ξ"},
305 {"xi", "ξ"},
306 /* Y */
307 {"Yacute", "Ý"},
308 {"yacute", "ý"},
309 {"yen", "¥"},
310 {"Yuml", "Ÿ"},
311 {"yuml", "ÿ"},
312 /* Z */
313 {"Zeta", "Ζ"},
314 {"zeta", "ζ"},
315 {"zwj", "\xE2\x80\x8D"},
316 {"zwnj", "\xE2\x80\x8C"},
317 {NULL, NULL}
320 static gchar* entity_extract_to_buffer(gchar *p, gchar b[])
322 gint i = 0;
324 while (*p != '\0' && *p != ';' && i < ENTITY_MAX_LEN) {
325 b[i] = *p;
326 ++i, ++p;
328 if (*p != ';' || i == 0 || i == ENTITY_MAX_LEN)
329 return NULL;
330 b[i] = '\0';
332 return b;
335 static gchar *entity_decode_numeric(gchar *str)
337 gchar b[ENTITY_MAX_LEN];
338 gchar *p = str, *res;
339 gboolean hex = FALSE;
340 gunichar c = 0;
341 gint ret;
343 ++p;
344 if (*p == '\0')
345 return NULL;
347 if (*p == 'x') {
348 hex = TRUE;
349 ++p;
350 if (*p == '\0')
351 return NULL;
354 if (entity_extract_to_buffer (p, b) == NULL)
355 return NULL;
357 if (strlen(b) > 0)
358 c = g_ascii_strtoll (b, NULL, (hex ? 16 : 10));
360 if (c < 32)
361 /* An unprintable character; return the Unicode replacement symbol */
362 return g_strdup("\xef\xbf\xbd");
364 if (!g_unichar_validate(c)) {
365 /* Make sure the character is valid Unicode */
366 debug_print("Numeric reference '&#%s;' is invalid in Unicode codespace\n", b);
367 return NULL;
370 res = g_malloc0 (DECODED_MAX_LEN + 1);
371 ret = g_unichar_to_utf8 (c, res);
372 if (ret == 0) {
373 debug_print("Failed to convert unicode character %u to UTF-8\n", c);
374 g_free(res);
375 res = NULL;
378 return res;
381 static gchar *entity_decode_symbol(gchar *str)
383 gchar b[ENTITY_MAX_LEN];
384 gchar *decoded;
386 if (entity_extract_to_buffer (str, b) == NULL)
387 return NULL;
389 if (symbol_table == NULL) {
390 gint i;
392 symbol_table = g_hash_table_new (g_str_hash, g_str_equal);
393 for (i = 0; symbolic_entities[i].key != NULL; ++i) {
394 g_hash_table_insert (symbol_table,
395 symbolic_entities[i].key, symbolic_entities[i].value);
397 debug_print("initialized entities table with %d symbols\n", i);
400 decoded = g_hash_table_lookup (symbol_table, b);
401 if (decoded != NULL)
402 return g_strdup (decoded);
404 return NULL;
407 gchar *entity_decode(gchar *str)
409 gchar *p = str;
410 if (p == NULL || *p != '&')
411 return NULL;
412 ++p;
413 if (*p == '\0')
414 return NULL;
415 if (*p == '#')
416 return entity_decode_numeric(p);
417 else
418 return entity_decode_symbol(p);