glib/ghostutils.c

   1 /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- */
   2
   3 /* GLIB - Library of useful routines for C programming
   4  * Copyright (C) 2008 Red Hat, Inc.
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General
  17  * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "config.h"
  21
  22 #include <string.h>
  23
  24 #include "ghostutils.h"
  25
  26 #include "garray.h"
  27 #include "gmem.h"
  28 #include "gstring.h"
  29 #include "gstrfuncs.h"
  30 #include "glibintl.h"
  31
  32
  33 /**
  34  * SECTION:ghostutils
  35  * @short_description: Internet hostname utilities
  36  *
  37  * Functions for manipulating internet hostnames; in particular, for
  38  * converting between Unicode and ASCII-encoded forms of
  39  * Internationalized Domain Names (IDNs).
  40  *
  41  * The
  42  * [Internationalized Domain Names for Applications (IDNA)](http://www.ietf.org/rfc/rfc3490.txt)
  43  * standards allow for the use
  44  * of Unicode domain names in applications, while providing
  45  * backward-compatibility with the old ASCII-only DNS, by defining an
  46  * ASCII-Compatible Encoding of any given Unicode name, which can be
  47  * used with non-IDN-aware applications and protocols. (For example,
  48  * "Παν語.org" maps to "xn--4wa8awb4637h.org".)
  49  **/
  50
  51 #define IDNA_ACE_PREFIX     "xn--"
  52 #define IDNA_ACE_PREFIX_LEN 4
  53
  54 /* Punycode constants, from RFC 3492. */
  55
  56 #define PUNYCODE_BASE          36
  57 #define PUNYCODE_TMIN           1
  58 #define PUNYCODE_TMAX          26
  59 #define PUNYCODE_SKEW          38
  60 #define PUNYCODE_DAMP         700
  61 #define PUNYCODE_INITIAL_BIAS  72
  62 #define PUNYCODE_INITIAL_N   0x80
  63
  64 #define PUNYCODE_IS_BASIC(cp) ((guint)(cp) < 0x80)
  65
  66 /* Encode/decode a single base-36 digit */
  67 static inline gchar
  68 encode_digit (guint dig)
  69 {
  70   if (dig < 26)
  71     return dig + 'a';
  72   else
  73     return dig - 26 + '0';
  74 }
  75
  76 static inline guint
  77 decode_digit (gchar dig)
  78 {
  79   if (dig >= 'A' && dig <= 'Z')
  80     return dig - 'A';
  81   else if (dig >= 'a' && dig <= 'z')
  82     return dig - 'a';
  83   else if (dig >= '0' && dig <= '9')
  84     return dig - '0' + 26;
  85   else
  86     return G_MAXUINT;
  87 }
  88
  89 /* Punycode bias adaptation algorithm, RFC 3492 section 6.1 */
  90 static guint
  91 adapt (guint    delta,
  92        guint    numpoints,
  93        gboolean firsttime)
  94 {
  95   guint k;
  96
  97   delta = firsttime ? delta / PUNYCODE_DAMP : delta / 2;
  98   delta += delta / numpoints;
  99
 100   k = 0;
 101   while (delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2)
 102     {
 103       delta /= PUNYCODE_BASE - PUNYCODE_TMIN;
 104       k += PUNYCODE_BASE;
 105     }
 106
 107   return k + ((PUNYCODE_BASE - PUNYCODE_TMIN + 1) * delta /
 108               (delta + PUNYCODE_SKEW));
 109 }
 110
 111 /* Punycode encoder, RFC 3492 section 6.3. The algorithm is
 112  * sufficiently bizarre that it's not really worth trying to explain
 113  * here.
 114  */
 115 static gboolean
 116 punycode_encode (const gchar *input_utf8,
 117                  gsize        input_utf8_length,
 118                  GString     *output)
 119 {
 120   guint delta, handled_chars, num_basic_chars, bias, j, q, k, t, digit;
 121   gunichar n, m, *input;
 122   glong input_length;
 123   gboolean success = FALSE;
 124
 125   /* Convert from UTF-8 to Unicode code points */
 126   input = g_utf8_to_ucs4 (input_utf8, input_utf8_length, NULL,
 127                           &input_length, NULL);
 128   if (!input)
 129     return FALSE;
 130
 131   /* Copy basic chars */
 132   for (j = num_basic_chars = 0; j < input_length; j++)
 133     {
 134       if (PUNYCODE_IS_BASIC (input[j]))
 135         {
 136           g_string_append_c (output, g_ascii_tolower (input[j]));
 137           num_basic_chars++;
 138         }
 139     }
 140   if (num_basic_chars)
 141     g_string_append_c (output, '-');
 142
 143   handled_chars = num_basic_chars;
 144
 145   /* Encode non-basic chars */
 146   delta = 0;
 147   bias = PUNYCODE_INITIAL_BIAS;
 148   n = PUNYCODE_INITIAL_N;
 149   while (handled_chars < input_length)
 150     {
 151       /* let m = the minimum {non-basic} code point >= n in the input */
 152       for (m = G_MAXUINT, j = 0; j < input_length; j++)
 153         {
 154           if (input[j] >= n && input[j] < m)
 155             m = input[j];
 156         }
 157
 158       if (m - n > (G_MAXUINT - delta) / (handled_chars + 1))
 159         goto fail;
 160       delta += (m - n) * (handled_chars + 1);
 161       n = m;
 162
 163       for (j = 0; j < input_length; j++)
 164         {
 165           if (input[j] < n)
 166             {
 167               if (++delta == 0)
 168                 goto fail;
 169             }
 170           else if (input[j] == n)
 171             {
 172               q = delta;
 173               for (k = PUNYCODE_BASE; ; k += PUNYCODE_BASE)
 174                 {
 175                   if (k <= bias)
 176                     t = PUNYCODE_TMIN;
 177                   else if (k >= bias + PUNYCODE_TMAX)
 178                     t = PUNYCODE_TMAX;
 179                   else
 180                     t = k - bias;
 181                   if (q < t)
 182                     break;
 183                   digit = t + (q - t) % (PUNYCODE_BASE - t);
 184                   g_string_append_c (output, encode_digit (digit));
 185                   q = (q - t) / (PUNYCODE_BASE - t);
 186                 }
 187
 188               g_string_append_c (output, encode_digit (q));
 189               bias = adapt (delta, handled_chars + 1, handled_chars == num_basic_chars);
 190               delta = 0;
 191               handled_chars++;
 192             }
 193         }
 194
 195       delta++;
 196       n++;
 197     }
 198
 199   success = TRUE;
 200
 201  fail:
 202   g_free (input);
 203   return success;
 204 }
 205
 206 /* From RFC 3454, Table B.1 */
 207 #define idna_is_junk(ch) ((ch) == 0x00AD || (ch) == 0x1806 || (ch) == 0x200B || (ch) == 0x2060 || (ch) == 0xFEFF || (ch) == 0x034F || (ch) == 0x180B || (ch) == 0x180C || (ch) == 0x180D || (ch) == 0x200C || (ch) == 0x200D || ((ch) >= 0xFE00 && (ch) <= 0xFE0F))
 208
 209 /* Scan @str for "junk" and return a cleaned-up string if any junk
 210  * is found. Else return %NULL.
 211  */
 212 static gchar *
 213 remove_junk (const gchar *str,
 214              gint         len)
 215 {
 216   GString *cleaned = NULL;
 217   const gchar *p;
 218   gunichar ch;
 219
 220   for (p = str; len == -1 ? *p : p < str + len; p = g_utf8_next_char (p))
 221     {
 222       ch = g_utf8_get_char (p);
 223       if (idna_is_junk (ch))
 224         {
 225           if (!cleaned)
 226             {
 227               cleaned = g_string_new (NULL);
 228               g_string_append_len (cleaned, str, p - str);
 229             }
 230         }
 231       else if (cleaned)
 232         g_string_append_unichar (cleaned, ch);
 233     }
 234
 235   if (cleaned)
 236     return g_string_free (cleaned, FALSE);
 237   else
 238     return NULL;
 239 }
 240
 241 static inline gboolean
 242 contains_uppercase_letters (const gchar *str,
 243                             gint         len)
 244 {
 245   const gchar *p;
 246
 247   for (p = str; len == -1 ? *p : p < str + len; p = g_utf8_next_char (p))
 248     {
 249       if (g_unichar_isupper (g_utf8_get_char (p)))
 250         return TRUE;
 251     }
 252   return FALSE;
 253 }
 254
 255 static inline gboolean
 256 contains_non_ascii (const gchar *str,
 257                     gint         len)
 258 {
 259   const gchar *p;
 260
 261   for (p = str; len == -1 ? *p : p < str + len; p++)
 262     {
 263       if ((guchar)*p > 0x80)
 264         return TRUE;
 265     }
 266   return FALSE;
 267 }
 268
 269 /* RFC 3454, Appendix C. ish. */
 270 static inline gboolean
 271 idna_is_prohibited (gunichar ch)
 272 {
 273   switch (g_unichar_type (ch))
 274     {
 275     case G_UNICODE_CONTROL:
 276     case G_UNICODE_FORMAT:
 277     case G_UNICODE_UNASSIGNED:
 278     case G_UNICODE_PRIVATE_USE:
 279     case G_UNICODE_SURROGATE:
 280     case G_UNICODE_LINE_SEPARATOR:
 281     case G_UNICODE_PARAGRAPH_SEPARATOR:
 282     case G_UNICODE_SPACE_SEPARATOR:
 283       return TRUE;
 284
 285     case G_UNICODE_OTHER_SYMBOL:
 286       if (ch == 0xFFFC || ch == 0xFFFD ||
 287           (ch >= 0x2FF0 && ch <= 0x2FFB))
 288         return TRUE;
 289       return FALSE;
 290
 291     case G_UNICODE_NON_SPACING_MARK:
 292       if (ch == 0x0340 || ch == 0x0341)
 293         return TRUE;
 294       return FALSE;
 295
 296     default:
 297       return FALSE;
 298     }
 299 }
 300
 301 /* RFC 3491 IDN cleanup algorithm. */
 302 static gchar *
 303 nameprep (const gchar *hostname,
 304           gint         len,
 305           gboolean    *is_unicode)
 306 {
 307   gchar *name, *tmp = NULL, *p;
 308
 309   /* It would be nice if we could do this without repeatedly
 310    * allocating strings and converting back and forth between
 311    * gunichars and UTF-8... The code does at least avoid doing most of
 312    * the sub-operations when they would just be equivalent to a
 313    * g_strdup().
 314    */
 315
 316   /* Remove presentation-only characters */
 317   name = remove_junk (hostname, len);
 318   if (name)
 319     {
 320       tmp = name;
 321       len = -1;
 322     }
 323   else
 324     name = (gchar *)hostname;
 325
 326   /* Convert to lowercase */
 327   if (contains_uppercase_letters (name, len))
 328     {
 329       name = g_utf8_strdown (name, len);
 330       g_free (tmp);
 331       tmp = name;
 332       len = -1;
 333     }
 334
 335   /* If there are no UTF8 characters, we're done. */
 336   if (!contains_non_ascii (name, len))
 337     {
 338       *is_unicode = FALSE;
 339       if (name == (gchar *)hostname)
 340         return len == -1 ? g_strdup (hostname) : g_strndup (hostname, len);
 341       else
 342         return name;
 343     }
 344
 345   *is_unicode = TRUE;
 346
 347   /* Normalize */
 348   name = g_utf8_normalize (name, len, G_NORMALIZE_NFKC);
 349   g_free (tmp);
 350   tmp = name;
 351
 352   if (!name)
 353     return NULL;
 354
 355   /* KC normalization may have created more capital letters (eg,
 356    * angstrom -> capital A with ring). So we have to lowercasify a
 357    * second time. (This is more-or-less how the nameprep algorithm
 358    * does it. If tolower(nfkc(tolower(X))) is guaranteed to be the
 359    * same as tolower(nfkc(X)), then we could skip the first tolower,
 360    * but I'm not sure it is.)
 361    */
 362   if (contains_uppercase_letters (name, -1))
 363     {
 364       name = g_utf8_strdown (name, -1);
 365       g_free (tmp);
 366       tmp = name;
 367     }
 368
 369   /* Check for prohibited characters */
 370   for (p = name; *p; p = g_utf8_next_char (p))
 371     {
 372       if (idna_is_prohibited (g_utf8_get_char (p)))
 373         {
 374           name = NULL;
 375           g_free (tmp);
 376           goto done;
 377         }
 378     }
 379
 380   /* FIXME: We're supposed to verify certain constraints on bidi
 381    * characters, but glib does not appear to have that information.
 382    */
 383
 384  done:
 385   return name;
 386 }
 387
 388 /* RFC 3490, section 3.1 says '.', 0x3002, 0xFF0E, and 0xFF61 count as
 389  * label-separating dots. @str must be '\0'-terminated.
 390  */
 391 #define idna_is_dot(str) ( \
 392   ((guchar)(str)[0] == '.') ||                                                 \
 393   ((guchar)(str)[0] == 0xE3 && (guchar)(str)[1] == 0x80 && (guchar)(str)[2] == 0x82) || \
 394   ((guchar)(str)[0] == 0xEF && (guchar)(str)[1] == 0xBC && (guchar)(str)[2] == 0x8E) || \
 395   ((guchar)(str)[0] == 0xEF && (guchar)(str)[1] == 0xBD && (guchar)(str)[2] == 0xA1) )
 396
 397 static const gchar *
 398 idna_end_of_label (const gchar *str)
 399 {
 400   for (; *str; str = g_utf8_next_char (str))
 401     {
 402       if (idna_is_dot (str))
 403         return str;
 404     }
 405   return str;
 406 }
 407
 408 /**
 409  * g_hostname_to_ascii:
 410  * @hostname: a valid UTF-8 or ASCII hostname
 411  *
 412  * Converts @hostname to its canonical ASCII form; an ASCII-only
 413  * string containing no uppercase letters and not ending with a
 414  * trailing dot.
 415  *
 416  * Returns: an ASCII hostname, which must be freed, or %NULL if
 417  * @hostname is in some way invalid.
 418  *
 419  * Since: 2.22
 420  **/
 421 gchar *
 422 g_hostname_to_ascii (const gchar *hostname)
 423 {
 424   gchar *name, *label, *p;
 425   GString *out;
 426   gssize llen, oldlen;
 427   gboolean unicode;
 428
 429   label = name = nameprep (hostname, -1, &unicode);
 430   if (!name || !unicode)
 431     return name;
 432
 433   out = g_string_new (NULL);
 434
 435   do
 436     {
 437       unicode = FALSE;
 438       for (p = label; *p && !idna_is_dot (p); p++)
 439         {
 440           if ((guchar)*p > 0x80)
 441             unicode = TRUE;
 442         }
 443
 444       oldlen = out->len;
 445       llen = p - label;
 446       if (unicode)
 447         {
 448           if (!strncmp (label, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
 449             goto fail;
 450
 451           g_string_append (out, IDNA_ACE_PREFIX);
 452           if (!punycode_encode (label, llen, out))
 453             goto fail;
 454         }
 455       else
 456         g_string_append_len (out, label, llen);
 457
 458       if (out->len - oldlen > 63)
 459         goto fail;
 460
 461       label += llen;
 462       if (*label)
 463         label = g_utf8_next_char (label);
 464       if (*label)
 465         g_string_append_c (out, '.');
 466     }
 467   while (*label);
 468
 469   g_free (name);
 470   return g_string_free (out, FALSE);
 471
 472  fail:
 473   g_free (name);
 474   g_string_free (out, TRUE);
 475   return NULL;
 476 }
 477
 478 /**
 479  * g_hostname_is_non_ascii:
 480  * @hostname: a hostname
 481  *
 482  * Tests if @hostname contains Unicode characters. If this returns
 483  * %TRUE, you need to encode the hostname with g_hostname_to_ascii()
 484  * before using it in non-IDN-aware contexts.
 485  *
 486  * Note that a hostname might contain a mix of encoded and unencoded
 487  * segments, and so it is possible for g_hostname_is_non_ascii() and
 488  * g_hostname_is_ascii_encoded() to both return %TRUE for a name.
 489  *
 490  * Returns: %TRUE if @hostname contains any non-ASCII characters
 491  *
 492  * Since: 2.22
 493  **/
 494 gboolean
 495 g_hostname_is_non_ascii (const gchar *hostname)
 496 {
 497   return contains_non_ascii (hostname, -1);
 498 }
 499
 500 /* Punycode decoder, RFC 3492 section 6.2. As with punycode_encode(),
 501  * read the RFC if you want to understand what this is actually doing.
 502  */
 503 static gboolean
 504 punycode_decode (const gchar *input,
 505                  gsize        input_length,
 506                  GString     *output)
 507 {
 508   GArray *output_chars;
 509   gunichar n;
 510   guint i, bias;
 511   guint oldi, w, k, digit, t;
 512   const gchar *split;
 513
 514   n = PUNYCODE_INITIAL_N;
 515   i = 0;
 516   bias = PUNYCODE_INITIAL_BIAS;
 517
 518   split = input + input_length - 1;
 519   while (split > input && *split != '-')
 520     split--;
 521   if (split > input)
 522     {
 523       output_chars = g_array_sized_new (FALSE, FALSE, sizeof (gunichar),
 524                                         split - input);
 525       input_length -= (split - input) + 1;
 526       while (input < split)
 527         {
 528           gunichar ch = (gunichar)*input++;
 529           if (!PUNYCODE_IS_BASIC (ch))
 530             goto fail;
 531           g_array_append_val (output_chars, ch);
 532         }
 533       input++;
 534     }
 535   else
 536     output_chars = g_array_new (FALSE, FALSE, sizeof (gunichar));
 537
 538   while (input_length)
 539     {
 540       oldi = i;
 541       w = 1;
 542       for (k = PUNYCODE_BASE; ; k += PUNYCODE_BASE)
 543         {
 544           if (!input_length--)
 545             goto fail;
 546           digit = decode_digit (*input++);
 547           if (digit >= PUNYCODE_BASE)
 548             goto fail;
 549           if (digit > (G_MAXUINT - i) / w)
 550             goto fail;
 551           i += digit * w;
 552           if (k <= bias)
 553             t = PUNYCODE_TMIN;
 554           else if (k >= bias + PUNYCODE_TMAX)
 555             t = PUNYCODE_TMAX;
 556           else
 557             t = k - bias;
 558           if (digit < t)
 559             break;
 560           if (w > G_MAXUINT / (PUNYCODE_BASE - t))
 561             goto fail;
 562           w *= (PUNYCODE_BASE - t);
 563         }
 564
 565       bias = adapt (i - oldi, output_chars->len + 1, oldi == 0);
 566
 567       if (i / (output_chars->len + 1) > G_MAXUINT - n)
 568         goto fail;
 569       n += i / (output_chars->len + 1);
 570       i %= (output_chars->len + 1);
 571
 572       g_array_insert_val (output_chars, i++, n);
 573     }
 574
 575   for (i = 0; i < output_chars->len; i++)
 576     g_string_append_unichar (output, g_array_index (output_chars, gunichar, i));
 577   g_array_free (output_chars, TRUE);
 578   return TRUE;
 579
 580  fail:
 581   g_array_free (output_chars, TRUE);
 582   return FALSE;
 583 }
 584
 585 /**
 586  * g_hostname_to_unicode:
 587  * @hostname: a valid UTF-8 or ASCII hostname
 588  *
 589  * Converts @hostname to its canonical presentation form; a UTF-8
 590  * string in Unicode normalization form C, containing no uppercase
 591  * letters, no forbidden characters, and no ASCII-encoded segments,
 592  * and not ending with a trailing dot.
 593  *
 594  * Of course if @hostname is not an internationalized hostname, then
 595  * the canonical presentation form will be entirely ASCII.
 596  *
 597  * Returns: a UTF-8 hostname, which must be freed, or %NULL if
 598  * @hostname is in some way invalid.
 599  *
 600  * Since: 2.22
 601  **/
 602 gchar *
 603 g_hostname_to_unicode (const gchar *hostname)
 604 {
 605   GString *out;
 606   gssize llen;
 607
 608   out = g_string_new (NULL);
 609
 610   do
 611     {
 612       llen = idna_end_of_label (hostname) - hostname;
 613       if (!g_ascii_strncasecmp (hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
 614         {
 615           hostname += IDNA_ACE_PREFIX_LEN;
 616           llen -= IDNA_ACE_PREFIX_LEN;
 617           if (!punycode_decode (hostname, llen, out))
 618             {
 619               g_string_free (out, TRUE);
 620               return NULL;
 621             }
 622         }
 623       else
 624         {
 625           gboolean unicode;
 626           gchar *canonicalized = nameprep (hostname, llen, &unicode);
 627
 628           if (!canonicalized)
 629             {
 630               g_string_free (out, TRUE);
 631               return NULL;
 632             }
 633           g_string_append (out, canonicalized);
 634           g_free (canonicalized);
 635         }
 636
 637       hostname += llen;
 638       if (*hostname)
 639         hostname = g_utf8_next_char (hostname);
 640       if (*hostname)
 641         g_string_append_c (out, '.');
 642     }
 643   while (*hostname);
 644
 645   return g_string_free (out, FALSE);
 646 }
 647
 648 /**
 649  * g_hostname_is_ascii_encoded:
 650  * @hostname: a hostname
 651  *
 652  * Tests if @hostname contains segments with an ASCII-compatible
 653  * encoding of an Internationalized Domain Name. If this returns
 654  * %TRUE, you should decode the hostname with g_hostname_to_unicode()
 655  * before displaying it to the user.
 656  *
 657  * Note that a hostname might contain a mix of encoded and unencoded
 658  * segments, and so it is possible for g_hostname_is_non_ascii() and
 659  * g_hostname_is_ascii_encoded() to both return %TRUE for a name.
 660  *
 661  * Returns: %TRUE if @hostname contains any ASCII-encoded
 662  * segments.
 663  *
 664  * Since: 2.22
 665  **/
 666 gboolean
 667 g_hostname_is_ascii_encoded (const gchar *hostname)
 668 {
 669   while (1)
 670     {
 671       if (!g_ascii_strncasecmp (hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
 672         return TRUE;
 673       hostname = idna_end_of_label (hostname);
 674       if (*hostname)
 675         hostname = g_utf8_next_char (hostname);
 676       if (!*hostname)
 677         return FALSE;
 678     }
 679 }
 680
 681 /**
 682  * g_hostname_is_ip_address:
 683  * @hostname: a hostname (or IP address in string form)
 684  *
 685  * Tests if @hostname is the string form of an IPv4 or IPv6 address.
 686  * (Eg, "192.168.0.1".)
 687  *
 688  * Returns: %TRUE if @hostname is an IP address
 689  *
 690  * Since: 2.22
 691  **/
 692 gboolean
 693 g_hostname_is_ip_address (const gchar *hostname)
 694 {
 695   gchar *p, *end;
 696   gint nsegments, octet;
 697
 698   /* On Linux we could implement this using inet_pton, but the Windows
 699    * equivalent of that requires linking against winsock, so we just
 700    * figure this out ourselves. Tested by tests/hostutils.c.
 701    */
 702
 703   p = (char *)hostname;
 704
 705   if (strchr (p, ':'))
 706     {
 707       gboolean skipped;
 708
 709       /* If it contains a ':', it's an IPv6 address (assuming it's an
 710        * IP address at all). This consists of eight ':'-separated
 711        * segments, each containing a 1-4 digit hex number, except that
 712        * optionally: (a) the last two segments can be replaced by an
 713        * IPv4 address, and (b) a single span of 1 to 8 "0000" segments
 714        * can be replaced with just "::".
 715        */
 716
 717       nsegments = 0;
 718       skipped = FALSE;
 719       while (*p && nsegments < 8)
 720         {
 721           /* Each segment after the first must be preceded by a ':'.
 722            * (We also handle half of the "string starts with ::" case
 723            * here.)
 724            */
 725           if (p != (char *)hostname || (p[0] == ':' && p[1] == ':'))
 726             {
 727               if (*p != ':')
 728                 return FALSE;
 729               p++;
 730             }
 731
 732           /* If there's another ':', it means we're skipping some segments */
 733           if (*p == ':' && !skipped)
 734             {
 735               skipped = TRUE;
 736               nsegments++;
 737
 738               /* Handle the "string ends with ::" case */
 739               if (!p[1])
 740                 p++;
 741
 742               continue;
 743             }
 744
 745           /* Read the segment, make sure it's valid. */
 746           for (end = p; g_ascii_isxdigit (*end); end++)
 747             ;
 748           if (end == p || end > p + 4)
 749             return FALSE;
 750
 751           if (*end == '.')
 752             {
 753               if ((nsegments == 6 && !skipped) || (nsegments <= 6 && skipped))
 754                 goto parse_ipv4;
 755               else
 756                 return FALSE;
 757             }
 758
 759           nsegments++;
 760           p = end;
 761         }
 762
 763       return !*p && (nsegments == 8 || skipped);
 764     }
 765
 766  parse_ipv4:
 767
 768   /* Parse IPv4: N.N.N.N, where each N <= 255 and doesn't have leading 0s. */
 769   for (nsegments = 0; nsegments < 4; nsegments++)
 770     {
 771       if (nsegments != 0)
 772         {
 773           if (*p != '.')
 774             return FALSE;
 775           p++;
 776         }
 777
 778       /* Check the segment; a little tricker than the IPv6 case since
 779        * we can't allow extra leading 0s, and we can't assume that all
 780        * strings of valid length are within range.
 781        */
 782       octet = 0;
 783       if (*p == '0')
 784         end = p + 1;
 785       else
 786         {
 787           for (end = p; g_ascii_isdigit (*end); end++)
 788             {
 789               octet = 10 * octet + (*end - '0');
 790
 791               if (octet > 255)
 792                 break;
 793             }
 794         }
 795       if (end == p || end > p + 3 || octet > 255)
 796         return FALSE;
 797
 798       p = end;
 799     }
 800
 801   /* If there's nothing left to parse, then it's ok. */
 802   return !*p;
 803 }