1 Oracle patch to handle invalid UTF-8 sequences
2 Sent upstream via e-mail
3 diff --git a/lib/utf32.c b/lib/utf32.c
7 unsigned long *utf32p = utf32;
11 + unsigned char first_byte;
12 +#endif /* SUN_CHANGE */
23 + for (i = width - 1; i > 0; i--) {
25 + if ((first_byte == 0xe0 && c < 0xa0) ||
26 + (first_byte == 0xed && c > 0x9f) ||
27 + (first_byte == 0xf0 && c < 0x90) ||
28 + (first_byte == 0xf4 && c > 0x8f) ||
29 + c < 0x80 || 0xc0 <= c) {
30 + WARNING(("idn__utf32_fromutf8: "
31 + "invalid character\n"));
32 + r = idn_invalid_encoding;
35 + v = (v << 6) | (c & 0x3f);
39 for (i = width - 1; i > 0; i--) {
41 if (c < 0x80 || 0xc0 <= c) {
44 v = (v << 6) | (c & 0x3f);
46 +#endif /* SUN_CHANGE */
48 if (v < min || v > UTF32_MAX) {
49 WARNING(("idn__utf32_fromutf8: invalid character\n"));
50 diff --git a/lib/utf8.c b/lib/utf8.c
55 #define VALID_CONT_BYTE(c) (0x80 <= (c) && (c) < 0xc0)
58 +#define INVALID_CONT_BYTE(first_byte, next_byte) \
59 + (((first_byte) == 0xe0 && (next_byte) < 0xa0) || \
60 + ((first_byte) == 0xed && (next_byte) > 0x9f) || \
61 + ((first_byte) == 0xf0 && (next_byte) < 0x90) || \
62 + ((first_byte) == 0xf4 && (next_byte) > 0x8f) || \
63 + (next_byte) < 0x80 || \
65 +#endif /* SUN_CHANGE */
68 * Determine number of bytes in next multibyte UTF-8 character.
73 const unsigned char *p = (const unsigned char *)s;
79 +#endif /* SUN_CHANGE */
88 + if (INVALID_CONT_BYTE(c, (*p)))
90 if (!VALID_CONT_BYTE(*p))
91 +#endif /* SUN_CHANGE */
93 v = (v << 6) | (*p & 0x3f);
97 +#endif /* SUN_CHANGE */
107 + if (p < t || UTF8_WIDTH(*p) == 0 ||
108 + (UTF8_WIDTH(*p) >= 2 && (const unsigned char *)s > p &&
109 + INVALID_CONT_BYTE((*p), (*(p + 1)))))
111 if (p < t || UTF8_WIDTH(*p) == 0)
113 +#endif /* SUN_CHANGE */