2 * Claws Mail -- a GTK based, lightweight, and fast e-mail client
3 * Copyright (C) 2017 Ricardo Mones and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "claws-features.h"
25 #define ENTITY_MAX_LEN 8
26 #define DECODED_MAX_LEN 6
28 static GHashTable
*symbol_table
= NULL
;
30 typedef struct _EntitySymbol EntitySymbol
;
38 /* in alphabetical order with upper-case version first */
39 static EntitySymbol symbolic_entities
[] = {
103 {"emsp", "\xE2\x80\x83"},
104 {"ensp", "\xE2\x80\x82"},
166 {"lrm", "\xE2\x80\x8E"},
180 {"nbsp", "\xC2\xA0"},
249 {"rlm", "\xE2\x80\x8F"},
280 {"thinsp", "\xE2\x80\x89"},
315 {"zwj", "\xE2\x80\x8D"},
316 {"zwnj", "\xE2\x80\x8C"},
320 static gchar
* entity_extract_to_buffer(gchar
*p
, gchar b
[])
324 while (*p
!= '\0' && *p
!= ';' && i
< ENTITY_MAX_LEN
) {
328 if (*p
!= ';' || i
== 0 || i
== ENTITY_MAX_LEN
)
335 static gchar
*entity_decode_numeric(gchar
*str
)
337 gchar b
[ENTITY_MAX_LEN
];
338 gchar
*p
= str
, *res
;
339 gboolean hex
= FALSE
;
354 if (entity_extract_to_buffer (p
, b
) == NULL
)
358 c
= g_ascii_strtoll (b
, NULL
, (hex
? 16 : 10));
361 /* An unprintable character; return the Unicode replacement symbol */
362 return g_strdup("\xef\xbf\xbd");
364 if (!g_unichar_validate(c
)) {
365 /* Make sure the character is valid Unicode */
366 debug_print("Numeric reference '&#%s;' is invalid in Unicode codespace\n", b
);
370 res
= g_malloc0 (DECODED_MAX_LEN
+ 1);
371 ret
= g_unichar_to_utf8 (c
, res
);
373 debug_print("Failed to convert unicode character %u to UTF-8\n", c
);
381 static gchar
*entity_decode_symbol(gchar
*str
)
383 gchar b
[ENTITY_MAX_LEN
];
386 if (entity_extract_to_buffer (str
, b
) == NULL
)
389 if (symbol_table
== NULL
) {
392 symbol_table
= g_hash_table_new (g_str_hash
, g_str_equal
);
393 for (i
= 0; symbolic_entities
[i
].key
!= NULL
; ++i
) {
394 g_hash_table_insert (symbol_table
,
395 symbolic_entities
[i
].key
, symbolic_entities
[i
].value
);
397 debug_print("initialized entities table with %d symbols\n", i
);
400 decoded
= g_hash_table_lookup (symbol_table
, b
);
402 return g_strdup (decoded
);
407 gchar
*entity_decode(gchar
*str
)
410 if (p
== NULL
|| *p
!= '&')
416 return entity_decode_numeric(p
);
418 return entity_decode_symbol(p
);