components/library/idnkit/patches/002-utf8-validation.patch

   1 Oracle patch to handle invalid UTF-8 sequences
   2 Sent upstream via e-mail
   3 diff --git a/lib/utf32.c b/lib/utf32.c
   4 --- a/lib/utf32.c
   5 +++ b/lib/utf32.c
   6 @@ -342,6 +342,10 @@
   7         unsigned long *utf32p = utf32;
   8         unsigned long v, min;
   9         unsigned char c;
  10 +#ifdef  SUN_CHANGE
  11 +       unsigned char first_byte;
  12 +#endif  /* SUN_CHANGE */
  13 +
  14         int width;
  15         int i;
  16
  17 @@ -384,6 +388,24 @@
  18                         goto ret;
  19                 }
  20
  21 +#ifdef  SUN_CHANGE
  22 +               first_byte = c;
  23 +               for (i = width - 1; i > 0; i--) {
  24 +                       c = *utf8p++;
  25 +                       if ((first_byte == 0xe0 && c < 0xa0) ||
  26 +                           (first_byte == 0xed && c > 0x9f) ||
  27 +                           (first_byte == 0xf0 && c < 0x90) ||
  28 +                           (first_byte == 0xf4 && c > 0x8f) ||
  29 +                           c < 0x80 || 0xc0 <= c) {
  30 +                               WARNING(("idn__utf32_fromutf8: "
  31 +                                        "invalid character\n"));
  32 +                               r = idn_invalid_encoding;
  33 +                               goto ret;
  34 +                       }
  35 +                       v = (v << 6) | (c & 0x3f);
  36 +                       first_byte = 0;
  37 +               }
  38 +#else
  39                 for (i = width - 1; i > 0; i--) {
  40                         c = *utf8p++;
  41                         if (c < 0x80 || 0xc0 <= c) {
  42 @@ -394,6 +416,7 @@
  43                         }
  44                         v = (v << 6) | (c & 0x3f);
  45                 }
  46 +#endif  /* SUN_CHANGE */
  47
  48                 if (v < min || v > UTF32_MAX) {
  49                         WARNING(("idn__utf32_fromutf8: invalid character\n"));
  50 diff --git a/lib/utf8.c b/lib/utf8.c
  51 --- a/lib/utf8.c
  52 +++ b/lib/utf8.c
  53 @@ -205,6 +205,16 @@
  54
  55  #define VALID_CONT_BYTE(c)     (0x80 <= (c) && (c) < 0xc0)
  56
  57 +#ifdef        SUN_CHANGE
  58 +#define       INVALID_CONT_BYTE(first_byte, next_byte) \
  59 +      (((first_byte) == 0xe0 && (next_byte) < 0xa0) || \
  60 +       ((first_byte) == 0xed && (next_byte) > 0x9f) || \
  61 +       ((first_byte) == 0xf0 && (next_byte) < 0x90) || \
  62 +       ((first_byte) == 0xf4 && (next_byte) > 0x8f) || \
  63 +       (next_byte) < 0x80 || \
  64 +       (next_byte) > 0xbf)
  65 +#endif        /* SUN_CHANGE */
  66 +
  67  /*
  68   * Determine number of bytes in next multibyte UTF-8 character.
  69   */
  70 @@ -226,7 +236,11 @@
  71         unsigned long v;
  72         unsigned long min;
  73         const unsigned char *p = (const unsigned char *)s;
  74 -       int c;
  75 +#ifdef  SUN_CHANGE
  76 +       unsigned long c;
  77 +#else
  78 +        int c;
  79 +#endif  /* SUN_CHANGE */
  80         int width;
  81         int rest;
  82
  83 @@ -269,10 +283,17 @@
  84
  85         rest = width - 1;
  86         while (rest-- > 0) {
  87 +#ifdef  SUN_CHANGE
  88 +               if (INVALID_CONT_BYTE(c, (*p)))
  89 +#else
  90                 if (!VALID_CONT_BYTE(*p))
  91 +#endif  /* SUN_CHANGE */
  92                         return (0);
  93                 v = (v << 6) | (*p & 0x3f);
  94                 p++;
  95 +#ifdef  SUN_CHANGE
  96 +                c = 0;
  97 +#endif  /* SUN_CHANGE */
  98         }
  99
 100         if (v < min)
 101 @@ -382,8 +403,15 @@
 102                     break;
 103                 p--;
 104         }
 105 +
 106 +#ifdef  SUN_CHANGE
 107 +       if (p < t || UTF8_WIDTH(*p) == 0 ||
 108 +           (UTF8_WIDTH(*p) >= 2 && (const unsigned char *)s > p &&
 109 +            INVALID_CONT_BYTE((*p), (*(p + 1)))))
 110 +#else
 111         if (p < t || UTF8_WIDTH(*p) == 0)
 112 -               return (NULL);
 113 +#endif  /* SUN_CHANGE */
 114 +                return (NULL);
 115
 116         return ((char *)p);
 117  }