glib/gmarkup.c

   1 /* gmarkup.c - Simple XML-like parser
   2  *
   3  *  Copyright 2000, 2003 Red Hat, Inc.
   4  *  Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
   5  *
   6  * GLib is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU Lesser General Public License as
   8  * published by the Free Software Foundation; either version 2 of the
   9  * License, or (at your option) any later version.
  10  *
  11  * GLib is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with GLib; see the file COPYING.LIB.  If not,
  18  * see <http://www.gnu.org/licenses/>.
  19  */
  20
  21 #include "config.h"
  22
  23 #include <stdarg.h>
  24 #include <string.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <errno.h>
  28
  29 #include "gmarkup.h"
  30
  31 #include "gatomic.h"
  32 #include "gslice.h"
  33 #include "galloca.h"
  34 #include "gstrfuncs.h"
  35 #include "gstring.h"
  36 #include "gtestutils.h"
  37 #include "glibintl.h"
  38 #include "gthread.h"
  39
  40 /**
  41  * SECTION:markup
  42  * @Title: Simple XML Subset Parser
  43  * @Short_description: parses a subset of XML
  44  * @See_also: [XML Specification](http://www.w3.org/TR/REC-xml/)
  45  *
  46  * The "GMarkup" parser is intended to parse a simple markup format
  47  * that's a subset of XML. This is a small, efficient, easy-to-use
  48  * parser. It should not be used if you expect to interoperate with
  49  * other applications generating full-scale XML. However, it's very
  50  * useful for application data files, config files, etc. where you
  51  * know your application will be the only one writing the file.
  52  * Full-scale XML parsers should be able to parse the subset used by
  53  * GMarkup, so you can easily migrate to full-scale XML at a later
  54  * time if the need arises.
  55  *
  56  * GMarkup is not guaranteed to signal an error on all invalid XML;
  57  * the parser may accept documents that an XML parser would not.
  58  * However, XML documents which are not well-formed (which is a
  59  * weaker condition than being valid. See the
  60  * [XML specification](http://www.w3.org/TR/REC-xml/)
  61  * for definitions of these terms.) are not considered valid GMarkup
  62  * documents.
  63  *
  64  * Simplifications to XML include:
  65  *
  66  * - Only UTF-8 encoding is allowed
  67  *
  68  * - No user-defined entities
  69  *
  70  * - Processing instructions, comments and the doctype declaration
  71  *   are "passed through" but are not interpreted in any way
  72  *
  73  * - No DTD or validation
  74  *
  75  * The markup format does support:
  76  *
  77  * - Elements
  78  *
  79  * - Attributes
  80  *
  81  * - 5 standard entities: &amp; &lt; &gt; &quot; &apos;
  82  *
  83  * - Character references
  84  *
  85  * - Sections marked as CDATA
  86  */
  87
  88 G_DEFINE_QUARK (g-markup-error-quark, g_markup_error)
  89
  90 typedef enum
  91 {
  92   STATE_START,
  93   STATE_AFTER_OPEN_ANGLE,
  94   STATE_AFTER_CLOSE_ANGLE,
  95   STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
  96   STATE_INSIDE_OPEN_TAG_NAME,
  97   STATE_INSIDE_ATTRIBUTE_NAME,
  98   STATE_AFTER_ATTRIBUTE_NAME,
  99   STATE_BETWEEN_ATTRIBUTES,
 100   STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
 101   STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
 102   STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
 103   STATE_INSIDE_TEXT,
 104   STATE_AFTER_CLOSE_TAG_SLASH,
 105   STATE_INSIDE_CLOSE_TAG_NAME,
 106   STATE_AFTER_CLOSE_TAG_NAME,
 107   STATE_INSIDE_PASSTHROUGH,
 108   STATE_ERROR
 109 } GMarkupParseState;
 110
 111 typedef struct
 112 {
 113   const char *prev_element;
 114   const GMarkupParser *prev_parser;
 115   gpointer prev_user_data;
 116 } GMarkupRecursionTracker;
 117
 118 struct _GMarkupParseContext
 119 {
 120   const GMarkupParser *parser;
 121
 122   volatile gint ref_count;
 123
 124   GMarkupParseFlags flags;
 125
 126   gint line_number;
 127   gint char_number;
 128
 129   GMarkupParseState state;
 130
 131   gpointer user_data;
 132   GDestroyNotify dnotify;
 133
 134   /* A piece of character data or an element that
 135    * hasn't "ended" yet so we haven't yet called
 136    * the callback for it.
 137    */
 138   GString *partial_chunk;
 139   GSList *spare_chunks;
 140
 141   GSList *tag_stack;
 142   GSList *tag_stack_gstr;
 143   GSList *spare_list_nodes;
 144
 145   GString **attr_names;
 146   GString **attr_values;
 147   gint cur_attr;
 148   gint alloc_attrs;
 149
 150   const gchar *current_text;
 151   gssize       current_text_len;
 152   const gchar *current_text_end;
 153
 154   /* used to save the start of the last interesting thingy */
 155   const gchar *start;
 156
 157   const gchar *iter;
 158
 159   guint document_empty : 1;
 160   guint parsing : 1;
 161   guint awaiting_pop : 1;
 162   gint balance;
 163
 164   /* subparser support */
 165   GSList *subparser_stack; /* (GMarkupRecursionTracker *) */
 166   const char *subparser_element;
 167   gpointer held_user_data;
 168 };
 169
 170 /*
 171  * Helpers to reduce our allocation overhead, we have
 172  * a well defined allocation lifecycle.
 173  */
 174 static GSList *
 175 get_list_node (GMarkupParseContext *context, gpointer data)
 176 {
 177   GSList *node;
 178   if (context->spare_list_nodes != NULL)
 179     {
 180       node = context->spare_list_nodes;
 181       context->spare_list_nodes = g_slist_remove_link (context->spare_list_nodes, node);
 182     }
 183   else
 184     node = g_slist_alloc();
 185   node->data = data;
 186   return node;
 187 }
 188
 189 static void
 190 free_list_node (GMarkupParseContext *context, GSList *node)
 191 {
 192   node->data = NULL;
 193   context->spare_list_nodes = g_slist_concat (node, context->spare_list_nodes);
 194 }
 195
 196 static inline void
 197 string_blank (GString *string)
 198 {
 199   string->str[0] = '\0';
 200   string->len = 0;
 201 }
 202
 203 /**
 204  * g_markup_parse_context_new:
 205  * @parser: a #GMarkupParser
 206  * @flags: one or more #GMarkupParseFlags
 207  * @user_data: user data to pass to #GMarkupParser functions
 208  * @user_data_dnotify: user data destroy notifier called when
 209  *     the parse context is freed
 210  *
 211  * Creates a new parse context. A parse context is used to parse
 212  * marked-up documents. You can feed any number of documents into
 213  * a context, as long as no errors occur; once an error occurs,
 214  * the parse context can't continue to parse text (you have to
 215  * free it and create a new parse context).
 216  *
 217  * Returns: a new #GMarkupParseContext
 218  **/
 219 GMarkupParseContext *
 220 g_markup_parse_context_new (const GMarkupParser *parser,
 221                             GMarkupParseFlags    flags,
 222                             gpointer             user_data,
 223                             GDestroyNotify       user_data_dnotify)
 224 {
 225   GMarkupParseContext *context;
 226
 227   g_return_val_if_fail (parser != NULL, NULL);
 228
 229   context = g_new (GMarkupParseContext, 1);
 230
 231   context->ref_count = 1;
 232   context->parser = parser;
 233   context->flags = flags;
 234   context->user_data = user_data;
 235   context->dnotify = user_data_dnotify;
 236
 237   context->line_number = 1;
 238   context->char_number = 1;
 239
 240   context->partial_chunk = NULL;
 241   context->spare_chunks = NULL;
 242   context->spare_list_nodes = NULL;
 243
 244   context->state = STATE_START;
 245   context->tag_stack = NULL;
 246   context->tag_stack_gstr = NULL;
 247   context->attr_names = NULL;
 248   context->attr_values = NULL;
 249   context->cur_attr = -1;
 250   context->alloc_attrs = 0;
 251
 252   context->current_text = NULL;
 253   context->current_text_len = -1;
 254   context->current_text_end = NULL;
 255
 256   context->start = NULL;
 257   context->iter = NULL;
 258
 259   context->document_empty = TRUE;
 260   context->parsing = FALSE;
 261
 262   context->awaiting_pop = FALSE;
 263   context->subparser_stack = NULL;
 264   context->subparser_element = NULL;
 265
 266   /* this is only looked at if awaiting_pop = TRUE.  initialise anyway. */
 267   context->held_user_data = NULL;
 268
 269   context->balance = 0;
 270
 271   return context;
 272 }
 273
 274 /**
 275  * g_markup_parse_context_ref:
 276  * @context: a #GMarkupParseContext
 277  *
 278  * Increases the reference count of @context.
 279  *
 280  * Returns: the same @context
 281  *
 282  * Since: 2.36
 283  **/
 284 GMarkupParseContext *
 285 g_markup_parse_context_ref (GMarkupParseContext *context)
 286 {
 287   g_return_val_if_fail (context != NULL, NULL);
 288   g_return_val_if_fail (context->ref_count > 0, NULL);
 289
 290   g_atomic_int_inc (&context->ref_count);
 291
 292   return context;
 293 }
 294
 295 /**
 296  * g_markup_parse_context_unref:
 297  * @context: a #GMarkupParseContext
 298  *
 299  * Decreases the reference count of @context.  When its reference count
 300  * drops to 0, it is freed.
 301  *
 302  * Since: 2.36
 303  **/
 304 void
 305 g_markup_parse_context_unref (GMarkupParseContext *context)
 306 {
 307   g_return_if_fail (context != NULL);
 308   g_return_if_fail (context->ref_count > 0);
 309
 310   if (g_atomic_int_dec_and_test (&context->ref_count))
 311     g_markup_parse_context_free (context);
 312 }
 313
 314 static void
 315 string_full_free (gpointer ptr)
 316 {
 317   g_string_free (ptr, TRUE);
 318 }
 319
 320 static void clear_attributes (GMarkupParseContext *context);
 321
 322 /**
 323  * g_markup_parse_context_free:
 324  * @context: a #GMarkupParseContext
 325  *
 326  * Frees a #GMarkupParseContext.
 327  *
 328  * This function can't be called from inside one of the
 329  * #GMarkupParser functions or while a subparser is pushed.
 330  */
 331 void
 332 g_markup_parse_context_free (GMarkupParseContext *context)
 333 {
 334   g_return_if_fail (context != NULL);
 335   g_return_if_fail (!context->parsing);
 336   g_return_if_fail (!context->subparser_stack);
 337   g_return_if_fail (!context->awaiting_pop);
 338
 339   if (context->dnotify)
 340     (* context->dnotify) (context->user_data);
 341
 342   clear_attributes (context);
 343   g_free (context->attr_names);
 344   g_free (context->attr_values);
 345
 346   g_slist_free_full (context->tag_stack_gstr, string_full_free);
 347   g_slist_free (context->tag_stack);
 348
 349   g_slist_free_full (context->spare_chunks, string_full_free);
 350   g_slist_free (context->spare_list_nodes);
 351
 352   if (context->partial_chunk)
 353     g_string_free (context->partial_chunk, TRUE);
 354
 355   g_free (context);
 356 }
 357
 358 static void pop_subparser_stack (GMarkupParseContext *context);
 359
 360 static void
 361 mark_error (GMarkupParseContext *context,
 362             GError              *error)
 363 {
 364   context->state = STATE_ERROR;
 365
 366   if (context->parser->error)
 367     (*context->parser->error) (context, error, context->user_data);
 368
 369   /* report the error all the way up to free all the user-data */
 370   while (context->subparser_stack)
 371     {
 372       pop_subparser_stack (context);
 373       context->awaiting_pop = FALSE; /* already been freed */
 374
 375       if (context->parser->error)
 376         (*context->parser->error) (context, error, context->user_data);
 377     }
 378 }
 379
 380 static void
 381 set_error (GMarkupParseContext  *context,
 382            GError              **error,
 383            GMarkupError          code,
 384            const gchar          *format,
 385            ...) G_GNUC_PRINTF (4, 5);
 386
 387 static void
 388 set_error_literal (GMarkupParseContext  *context,
 389                    GError              **error,
 390                    GMarkupError          code,
 391                    const gchar          *message)
 392 {
 393   GError *tmp_error;
 394
 395   tmp_error = g_error_new_literal (G_MARKUP_ERROR, code, message);
 396
 397   g_prefix_error (&tmp_error,
 398                   _("Error on line %d char %d: "),
 399                   context->line_number,
 400                   context->char_number);
 401
 402   mark_error (context, tmp_error);
 403
 404   g_propagate_error (error, tmp_error);
 405 }
 406
 407 G_GNUC_PRINTF(4, 5)
 408 static void
 409 set_error (GMarkupParseContext  *context,
 410            GError              **error,
 411            GMarkupError          code,
 412            const gchar          *format,
 413            ...)
 414 {
 415   gchar *s;
 416   gchar *s_valid;
 417   va_list args;
 418
 419   va_start (args, format);
 420   s = g_strdup_vprintf (format, args);
 421   va_end (args);
 422
 423   /* Make sure that the GError message is valid UTF-8
 424    * even if it is complaining about invalid UTF-8 in the markup
 425    */
 426   s_valid = _g_utf8_make_valid (s);
 427   set_error_literal (context, error, code, s);
 428
 429   g_free (s);
 430   g_free (s_valid);
 431 }
 432
 433 static void
 434 propagate_error (GMarkupParseContext  *context,
 435                  GError              **dest,
 436                  GError               *src)
 437 {
 438   if (context->flags & G_MARKUP_PREFIX_ERROR_POSITION)
 439     g_prefix_error (&src,
 440                     _("Error on line %d char %d: "),
 441                     context->line_number,
 442                     context->char_number);
 443
 444   mark_error (context, src);
 445
 446   g_propagate_error (dest, src);
 447 }
 448
 449 #define IS_COMMON_NAME_END_CHAR(c) \
 450   ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
 451
 452 static gboolean
 453 slow_name_validate (GMarkupParseContext  *context,
 454                     const gchar          *name,
 455                     GError              **error)
 456 {
 457   const gchar *p = name;
 458
 459   if (!g_utf8_validate (name, strlen (name), NULL))
 460     {
 461       set_error (context, error, G_MARKUP_ERROR_BAD_UTF8,
 462                  _("Invalid UTF-8 encoded text in name - not valid '%s'"), name);
 463       return FALSE;
 464     }
 465
 466   if (!(g_ascii_isalpha (*p) ||
 467         (!IS_COMMON_NAME_END_CHAR (*p) &&
 468          (*p == '_' ||
 469           *p == ':' ||
 470           g_unichar_isalpha (g_utf8_get_char (p))))))
 471     {
 472       set_error (context, error, G_MARKUP_ERROR_PARSE,
 473                  _("'%s' is not a valid name"), name);
 474       return FALSE;
 475     }
 476
 477   for (p = g_utf8_next_char (name); *p != '\0'; p = g_utf8_next_char (p))
 478     {
 479       /* is_name_char */
 480       if (!(g_ascii_isalnum (*p) ||
 481             (!IS_COMMON_NAME_END_CHAR (*p) &&
 482              (*p == '.' ||
 483               *p == '-' ||
 484               *p == '_' ||
 485               *p == ':' ||
 486               g_unichar_isalpha (g_utf8_get_char (p))))))
 487         {
 488           set_error (context, error, G_MARKUP_ERROR_PARSE,
 489                      _("'%s' is not a valid name: '%c'"), name, *p);
 490           return FALSE;
 491         }
 492     }
 493   return TRUE;
 494 }
 495
 496 /*
 497  * Use me for elements, attributes etc.
 498  */
 499 static gboolean
 500 name_validate (GMarkupParseContext  *context,
 501                const gchar          *name,
 502                GError              **error)
 503 {
 504   char mask;
 505   const char *p;
 506
 507   /* name start char */
 508   p = name;
 509   if (G_UNLIKELY (IS_COMMON_NAME_END_CHAR (*p) ||
 510                   !(g_ascii_isalpha (*p) || *p == '_' || *p == ':')))
 511     goto slow_validate;
 512
 513   for (mask = *p++; *p != '\0'; p++)
 514     {
 515       mask |= *p;
 516
 517       /* is_name_char */
 518       if (G_UNLIKELY (!(g_ascii_isalnum (*p) ||
 519                         (!IS_COMMON_NAME_END_CHAR (*p) &&
 520                          (*p == '.' ||
 521                           *p == '-' ||
 522                           *p == '_' ||
 523                           *p == ':')))))
 524         goto slow_validate;
 525     }
 526
 527   if (mask & 0x80) /* un-common / non-ascii */
 528     goto slow_validate;
 529
 530   return TRUE;
 531
 532  slow_validate:
 533   return slow_name_validate (context, name, error);
 534 }
 535
 536 static gboolean
 537 text_validate (GMarkupParseContext  *context,
 538                const gchar          *p,
 539                gint                  len,
 540                GError              **error)
 541 {
 542   if (!g_utf8_validate (p, len, NULL))
 543     {
 544       set_error (context, error, G_MARKUP_ERROR_BAD_UTF8,
 545                  _("Invalid UTF-8 encoded text in name - not valid '%s'"), p);
 546       return FALSE;
 547     }
 548   else
 549     return TRUE;
 550 }
 551
 552 static gchar*
 553 char_str (gunichar c,
 554           gchar   *buf)
 555 {
 556   memset (buf, 0, 8);
 557   g_unichar_to_utf8 (c, buf);
 558   return buf;
 559 }
 560
 561 static gchar*
 562 utf8_str (const gchar *utf8,
 563           gchar       *buf)
 564 {
 565   char_str (g_utf8_get_char (utf8), buf);
 566   return buf;
 567 }
 568
 569 G_GNUC_PRINTF(5, 6)
 570 static void
 571 set_unescape_error (GMarkupParseContext  *context,
 572                     GError              **error,
 573                     const gchar          *remaining_text,
 574                     GMarkupError          code,
 575                     const gchar          *format,
 576                     ...)
 577 {
 578   GError *tmp_error;
 579   gchar *s;
 580   va_list args;
 581   gint remaining_newlines;
 582   const gchar *p;
 583
 584   remaining_newlines = 0;
 585   p = remaining_text;
 586   while (*p != '\0')
 587     {
 588       if (*p == '\n')
 589         ++remaining_newlines;
 590       ++p;
 591     }
 592
 593   va_start (args, format);
 594   s = g_strdup_vprintf (format, args);
 595   va_end (args);
 596
 597   tmp_error = g_error_new (G_MARKUP_ERROR,
 598                            code,
 599                            _("Error on line %d: %s"),
 600                            context->line_number - remaining_newlines,
 601                            s);
 602
 603   g_free (s);
 604
 605   mark_error (context, tmp_error);
 606
 607   g_propagate_error (error, tmp_error);
 608 }
 609
 610 /*
 611  * re-write the GString in-place, unescaping anything that escaped.
 612  * most XML does not contain entities, or escaping.
 613  */
 614 static gboolean
 615 unescape_gstring_inplace (GMarkupParseContext  *context,
 616                           GString              *string,
 617                           gboolean             *is_ascii,
 618                           GError              **error)
 619 {
 620   char mask, *to;
 621   const char *from;
 622   gboolean normalize_attribute;
 623
 624   *is_ascii = FALSE;
 625
 626   /* are we unescaping an attribute or not ? */
 627   if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
 628       context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
 629     normalize_attribute = TRUE;
 630   else
 631     normalize_attribute = FALSE;
 632
 633   /*
 634    * Meeks' theorem: unescaping can only shrink text.
 635    * for &lt; etc. this is obvious, for &#xffff; more
 636    * thought is required, but this is patently so.
 637    */
 638   mask = 0;
 639   for (from = to = string->str; *from != '\0'; from++, to++)
 640     {
 641       *to = *from;
 642
 643       mask |= *to;
 644       if (normalize_attribute && (*to == '\t' || *to == '\n'))
 645         *to = ' ';
 646       if (*to == '\r')
 647         {
 648           *to = normalize_attribute ? ' ' : '\n';
 649           if (from[1] == '\n')
 650             from++;
 651         }
 652       if (*from == '&')
 653         {
 654           from++;
 655           if (*from == '#')
 656             {
 657               gint base = 10;
 658               gulong l;
 659               gchar *end = NULL;
 660
 661               from++;
 662
 663               if (*from == 'x')
 664                 {
 665                   base = 16;
 666                   from++;
 667                 }
 668
 669               errno = 0;
 670               l = strtoul (from, &end, base);
 671
 672               if (end == from || errno != 0)
 673                 {
 674                   set_unescape_error (context, error,
 675                                       from, G_MARKUP_ERROR_PARSE,
 676                                       _("Failed to parse '%-.*s', which "
 677                                         "should have been a digit "
 678                                         "inside a character reference "
 679                                         "(&#234; for example) - perhaps "
 680                                         "the digit is too large"),
 681                                       (int)(end - from), from);
 682                   return FALSE;
 683                 }
 684               else if (*end != ';')
 685                 {
 686                   set_unescape_error (context, error,
 687                                       from, G_MARKUP_ERROR_PARSE,
 688                                       _("Character reference did not end with a "
 689                                         "semicolon; "
 690                                         "most likely you used an ampersand "
 691                                         "character without intending to start "
 692                                         "an entity - escape ampersand as &amp;"));
 693                   return FALSE;
 694                 }
 695               else
 696                 {
 697                   /* characters XML 1.1 permits */
 698                   if ((0 < l && l <= 0xD7FF) ||
 699                       (0xE000 <= l && l <= 0xFFFD) ||
 700                       (0x10000 <= l && l <= 0x10FFFF))
 701                     {
 702                       gchar buf[8];
 703                       char_str (l, buf);
 704                       strcpy (to, buf);
 705                       to += strlen (buf) - 1;
 706                       from = end;
 707                       if (l >= 0x80) /* not ascii */
 708                         mask |= 0x80;
 709                     }
 710                   else
 711                     {
 712                       set_unescape_error (context, error,
 713                                           from, G_MARKUP_ERROR_PARSE,
 714                                           _("Character reference '%-.*s' does not "
 715                                             "encode a permitted character"),
 716                                           (int)(end - from), from);
 717                       return FALSE;
 718                     }
 719                 }
 720             }
 721
 722           else if (strncmp (from, "lt;", 3) == 0)
 723             {
 724               *to = '<';
 725               from += 2;
 726             }
 727           else if (strncmp (from, "gt;", 3) == 0)
 728             {
 729               *to = '>';
 730               from += 2;
 731             }
 732           else if (strncmp (from, "amp;", 4) == 0)
 733             {
 734               *to = '&';
 735               from += 3;
 736             }
 737           else if (strncmp (from, "quot;", 5) == 0)
 738             {
 739               *to = '"';
 740               from += 4;
 741             }
 742           else if (strncmp (from, "apos;", 5) == 0)
 743             {
 744               *to = '\'';
 745               from += 4;
 746             }
 747           else
 748             {
 749               if (*from == ';')
 750                 set_unescape_error (context, error,
 751                                     from, G_MARKUP_ERROR_PARSE,
 752                                     _("Empty entity '&;' seen; valid "
 753                                       "entities are: &amp; &quot; &lt; &gt; &apos;"));
 754               else
 755                 {
 756                   const char *end = strchr (from, ';');
 757                   if (end)
 758                     set_unescape_error (context, error,
 759                                         from, G_MARKUP_ERROR_PARSE,
 760                                         _("Entity name '%-.*s' is not known"),
 761                                         (int)(end - from), from);
 762                   else
 763                     set_unescape_error (context, error,
 764                                         from, G_MARKUP_ERROR_PARSE,
 765                                         _("Entity did not end with a semicolon; "
 766                                           "most likely you used an ampersand "
 767                                           "character without intending to start "
 768                                           "an entity - escape ampersand as &amp;"));
 769                 }
 770               return FALSE;
 771             }
 772         }
 773     }
 774
 775   g_assert (to - string->str <= string->len);
 776   if (to - string->str != string->len)
 777     g_string_truncate (string, to - string->str);
 778
 779   *is_ascii = !(mask & 0x80);
 780
 781   return TRUE;
 782 }
 783
 784 static inline gboolean
 785 advance_char (GMarkupParseContext *context)
 786 {
 787   context->iter++;
 788   context->char_number++;
 789
 790   if (G_UNLIKELY (context->iter == context->current_text_end))
 791       return FALSE;
 792
 793   else if (G_UNLIKELY (*context->iter == '\n'))
 794     {
 795       context->line_number++;
 796       context->char_number = 1;
 797     }
 798
 799   return TRUE;
 800 }
 801
 802 static inline gboolean
 803 xml_isspace (char c)
 804 {
 805   return c == ' ' || c == '\t' || c == '\n' || c == '\r';
 806 }
 807
 808 static void
 809 skip_spaces (GMarkupParseContext *context)
 810 {
 811   do
 812     {
 813       if (!xml_isspace (*context->iter))
 814         return;
 815     }
 816   while (advance_char (context));
 817 }
 818
 819 static void
 820 advance_to_name_end (GMarkupParseContext *context)
 821 {
 822   do
 823     {
 824       if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
 825         return;
 826       if (xml_isspace (*(context->iter)))
 827         return;
 828     }
 829   while (advance_char (context));
 830 }
 831
 832 static void
 833 release_chunk (GMarkupParseContext *context, GString *str)
 834 {
 835   GSList *node;
 836   if (!str)
 837     return;
 838   if (str->allocated_len > 256)
 839     { /* large strings are unusual and worth freeing */
 840       g_string_free (str, TRUE);
 841       return;
 842     }
 843   string_blank (str);
 844   node = get_list_node (context, str);
 845   context->spare_chunks = g_slist_concat (node, context->spare_chunks);
 846 }
 847
 848 static void
 849 add_to_partial (GMarkupParseContext *context,
 850                 const gchar         *text_start,
 851                 const gchar         *text_end)
 852 {
 853   if (context->partial_chunk == NULL)
 854     { /* allocate a new chunk to parse into */
 855
 856       if (context->spare_chunks != NULL)
 857         {
 858           GSList *node = context->spare_chunks;
 859           context->spare_chunks = g_slist_remove_link (context->spare_chunks, node);
 860           context->partial_chunk = node->data;
 861           free_list_node (context, node);
 862         }
 863       else
 864         context->partial_chunk = g_string_sized_new (MAX (28, text_end - text_start));
 865     }
 866
 867   if (text_start != text_end)
 868     g_string_insert_len (context->partial_chunk, -1,
 869                          text_start, text_end - text_start);
 870 }
 871
 872 static inline void
 873 truncate_partial (GMarkupParseContext *context)
 874 {
 875   if (context->partial_chunk != NULL)
 876     string_blank (context->partial_chunk);
 877 }
 878
 879 static inline const gchar*
 880 current_element (GMarkupParseContext *context)
 881 {
 882   return context->tag_stack->data;
 883 }
 884
 885 static void
 886 pop_subparser_stack (GMarkupParseContext *context)
 887 {
 888   GMarkupRecursionTracker *tracker;
 889
 890   g_assert (context->subparser_stack);
 891
 892   tracker = context->subparser_stack->data;
 893
 894   context->awaiting_pop = TRUE;
 895   context->held_user_data = context->user_data;
 896
 897   context->user_data = tracker->prev_user_data;
 898   context->parser = tracker->prev_parser;
 899   context->subparser_element = tracker->prev_element;
 900   g_slice_free (GMarkupRecursionTracker, tracker);
 901
 902   context->subparser_stack = g_slist_delete_link (context->subparser_stack,
 903                                                   context->subparser_stack);
 904 }
 905
 906 static void
 907 push_partial_as_tag (GMarkupParseContext *context)
 908 {
 909   GString *str = context->partial_chunk;
 910   /* sadly, this is exported by gmarkup_get_element_stack as-is */
 911   context->tag_stack = g_slist_concat (get_list_node (context, str->str), context->tag_stack);
 912   context->tag_stack_gstr = g_slist_concat (get_list_node (context, str), context->tag_stack_gstr);
 913   context->partial_chunk = NULL;
 914 }
 915
 916 static void
 917 pop_tag (GMarkupParseContext *context)
 918 {
 919   GSList *nodea, *nodeb;
 920
 921   nodea = context->tag_stack;
 922   nodeb = context->tag_stack_gstr;
 923   release_chunk (context, nodeb->data);
 924   context->tag_stack = g_slist_remove_link (context->tag_stack, nodea);
 925   context->tag_stack_gstr = g_slist_remove_link (context->tag_stack_gstr, nodeb);
 926   free_list_node (context, nodea);
 927   free_list_node (context, nodeb);
 928 }
 929
 930 static void
 931 possibly_finish_subparser (GMarkupParseContext *context)
 932 {
 933   if (current_element (context) == context->subparser_element)
 934     pop_subparser_stack (context);
 935 }
 936
 937 static void
 938 ensure_no_outstanding_subparser (GMarkupParseContext *context)
 939 {
 940   if (context->awaiting_pop)
 941     g_critical ("During the first end_element call after invoking a "
 942                 "subparser you must pop the subparser stack and handle "
 943                 "the freeing of the subparser user_data.  This can be "
 944                 "done by calling the end function of the subparser.  "
 945                 "Very probably, your program just leaked memory.");
 946
 947   /* let valgrind watch the pointer disappear... */
 948   context->held_user_data = NULL;
 949   context->awaiting_pop = FALSE;
 950 }
 951
 952 static const gchar*
 953 current_attribute (GMarkupParseContext *context)
 954 {
 955   g_assert (context->cur_attr >= 0);
 956   return context->attr_names[context->cur_attr]->str;
 957 }
 958
 959 static void
 960 add_attribute (GMarkupParseContext *context, GString *str)
 961 {
 962   if (context->cur_attr + 2 >= context->alloc_attrs)
 963     {
 964       context->alloc_attrs += 5; /* silly magic number */
 965       context->attr_names = g_realloc (context->attr_names, sizeof(GString*)*context->alloc_attrs);
 966       context->attr_values = g_realloc (context->attr_values, sizeof(GString*)*context->alloc_attrs);
 967     }
 968   context->cur_attr++;
 969   context->attr_names[context->cur_attr] = str;
 970   context->attr_values[context->cur_attr] = NULL;
 971   context->attr_names[context->cur_attr+1] = NULL;
 972   context->attr_values[context->cur_attr+1] = NULL;
 973 }
 974
 975 static void
 976 clear_attributes (GMarkupParseContext *context)
 977 {
 978   /* Go ahead and free the attributes. */
 979   for (; context->cur_attr >= 0; context->cur_attr--)
 980     {
 981       int pos = context->cur_attr;
 982       release_chunk (context, context->attr_names[pos]);
 983       release_chunk (context, context->attr_values[pos]);
 984       context->attr_names[pos] = context->attr_values[pos] = NULL;
 985     }
 986   g_assert (context->cur_attr == -1);
 987   g_assert (context->attr_names == NULL ||
 988             context->attr_names[0] == NULL);
 989   g_assert (context->attr_values == NULL ||
 990             context->attr_values[0] == NULL);
 991 }
 992
 993 /* This has to be a separate function to ensure the alloca's
 994  * are unwound on exit - otherwise we grow & blow the stack
 995  * with large documents
 996  */
 997 static inline void
 998 emit_start_element (GMarkupParseContext  *context,
 999                     GError              **error)
1000 {
1001   int i, j = 0;
1002   const gchar *start_name;
1003   const gchar **attr_names;
1004   const gchar **attr_values;
1005   GError *tmp_error;
1006
1007   /* In case we want to ignore qualified tags and we see that we have
1008    * one here, we push a subparser.  This will ignore all tags inside of
1009    * the qualified tag.
1010    *
1011    * We deal with the end of the subparser from emit_end_element.
1012    */
1013   if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (current_element (context), ':'))
1014     {
1015       static const GMarkupParser ignore_parser;
1016       g_markup_parse_context_push (context, &ignore_parser, NULL);
1017       clear_attributes (context);
1018       return;
1019     }
1020
1021   attr_names = g_newa (const gchar *, context->cur_attr + 2);
1022   attr_values = g_newa (const gchar *, context->cur_attr + 2);
1023   for (i = 0; i < context->cur_attr + 1; i++)
1024     {
1025       /* Possibly omit qualified attribute names from the list */
1026       if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (context->attr_names[i]->str, ':'))
1027         continue;
1028
1029       attr_names[j] = context->attr_names[i]->str;
1030       attr_values[j] = context->attr_values[i]->str;
1031       j++;
1032     }
1033   attr_names[j] = NULL;
1034   attr_values[j] = NULL;
1035
1036   /* Call user callback for element start */
1037   tmp_error = NULL;
1038   start_name = current_element (context);
1039
1040   if (context->parser->start_element &&
1041       name_validate (context, start_name, error))
1042     (* context->parser->start_element) (context,
1043                                         start_name,
1044                                         (const gchar **)attr_names,
1045                                         (const gchar **)attr_values,
1046                                         context->user_data,
1047                                         &tmp_error);
1048   clear_attributes (context);
1049
1050   if (tmp_error != NULL)
1051     propagate_error (context, error, tmp_error);
1052 }
1053
1054 static void
1055 emit_end_element (GMarkupParseContext  *context,
1056                   GError              **error)
1057 {
1058   /* We need to pop the tag stack and call the end_element
1059    * function, since this is the close tag
1060    */
1061   GError *tmp_error = NULL;
1062
1063   g_assert (context->tag_stack != NULL);
1064
1065   possibly_finish_subparser (context);
1066
1067   /* We might have just returned from our ignore subparser */
1068   if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (current_element (context), ':'))
1069     {
1070       g_markup_parse_context_pop (context);
1071       pop_tag (context);
1072       return;
1073     }
1074
1075   tmp_error = NULL;
1076   if (context->parser->end_element)
1077     (* context->parser->end_element) (context,
1078                                       current_element (context),
1079                                       context->user_data,
1080                                       &tmp_error);
1081
1082   ensure_no_outstanding_subparser (context);
1083
1084   if (tmp_error)
1085     {
1086       mark_error (context, tmp_error);
1087       g_propagate_error (error, tmp_error);
1088     }
1089
1090   pop_tag (context);
1091 }
1092
1093 /**
1094  * g_markup_parse_context_parse:
1095  * @context: a #GMarkupParseContext
1096  * @text: chunk of text to parse
1097  * @text_len: length of @text in bytes
1098  * @error: return location for a #GError
1099  *
1100  * Feed some data to the #GMarkupParseContext.
1101  *
1102  * The data need not be valid UTF-8; an error will be signaled if
1103  * it's invalid. The data need not be an entire document; you can
1104  * feed a document into the parser incrementally, via multiple calls
1105  * to this function. Typically, as you receive data from a network
1106  * connection or file, you feed each received chunk of data into this
1107  * function, aborting the process if an error occurs. Once an error
1108  * is reported, no further data may be fed to the #GMarkupParseContext;
1109  * all errors are fatal.
1110  *
1111  * Returns: %FALSE if an error occurred, %TRUE on success
1112  */
1113 gboolean
1114 g_markup_parse_context_parse (GMarkupParseContext  *context,
1115                               const gchar          *text,
1116                               gssize                text_len,
1117                               GError              **error)
1118 {
1119   g_return_val_if_fail (context != NULL, FALSE);
1120   g_return_val_if_fail (text != NULL, FALSE);
1121   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1122   g_return_val_if_fail (!context->parsing, FALSE);
1123
1124   if (text_len < 0)
1125     text_len = strlen (text);
1126
1127   if (text_len == 0)
1128     return TRUE;
1129
1130   context->parsing = TRUE;
1131
1132
1133   context->current_text = text;
1134   context->current_text_len = text_len;
1135   context->current_text_end = context->current_text + text_len;
1136   context->iter = context->current_text;
1137   context->start = context->iter;
1138
1139   while (context->iter != context->current_text_end)
1140     {
1141       switch (context->state)
1142         {
1143         case STATE_START:
1144           /* Possible next state: AFTER_OPEN_ANGLE */
1145
1146           g_assert (context->tag_stack == NULL);
1147
1148           /* whitespace is ignored outside of any elements */
1149           skip_spaces (context);
1150
1151           if (context->iter != context->current_text_end)
1152             {
1153               if (*context->iter == '<')
1154                 {
1155                   /* Move after the open angle */
1156                   advance_char (context);
1157
1158                   context->state = STATE_AFTER_OPEN_ANGLE;
1159
1160                   /* this could start a passthrough */
1161                   context->start = context->iter;
1162
1163                   /* document is now non-empty */
1164                   context->document_empty = FALSE;
1165                 }
1166               else
1167                 {
1168                   set_error_literal (context,
1169                                      error,
1170                                      G_MARKUP_ERROR_PARSE,
1171                                      _("Document must begin with an element (e.g. <book>)"));
1172                 }
1173             }
1174           break;
1175
1176         case STATE_AFTER_OPEN_ANGLE:
1177           /* Possible next states: INSIDE_OPEN_TAG_NAME,
1178            *  AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
1179            */
1180           if (*context->iter == '?' ||
1181               *context->iter == '!')
1182             {
1183               /* include < in the passthrough */
1184               const gchar *openangle = "<";
1185               add_to_partial (context, openangle, openangle + 1);
1186               context->start = context->iter;
1187               context->balance = 1;
1188               context->state = STATE_INSIDE_PASSTHROUGH;
1189             }
1190           else if (*context->iter == '/')
1191             {
1192               /* move after it */
1193               advance_char (context);
1194
1195               context->state = STATE_AFTER_CLOSE_TAG_SLASH;
1196             }
1197           else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1198             {
1199               context->state = STATE_INSIDE_OPEN_TAG_NAME;
1200
1201               /* start of tag name */
1202               context->start = context->iter;
1203             }
1204           else
1205             {
1206               gchar buf[8];
1207
1208               set_error (context,
1209                          error,
1210                          G_MARKUP_ERROR_PARSE,
1211                          _("'%s' is not a valid character following "
1212                            "a '<' character; it may not begin an "
1213                            "element name"),
1214                          utf8_str (context->iter, buf));
1215             }
1216           break;
1217
1218           /* The AFTER_CLOSE_ANGLE state is actually sort of
1219            * broken, because it doesn't correspond to a range
1220            * of characters in the input stream as the others do,
1221            * and thus makes things harder to conceptualize
1222            */
1223         case STATE_AFTER_CLOSE_ANGLE:
1224           /* Possible next states: INSIDE_TEXT, STATE_START */
1225           if (context->tag_stack == NULL)
1226             {
1227               context->start = NULL;
1228               context->state = STATE_START;
1229             }
1230           else
1231             {
1232               context->start = context->iter;
1233               context->state = STATE_INSIDE_TEXT;
1234             }
1235           break;
1236
1237         case STATE_AFTER_ELISION_SLASH:
1238           /* Possible next state: AFTER_CLOSE_ANGLE */
1239           if (*context->iter == '>')
1240             {
1241               /* move after the close angle */
1242               advance_char (context);
1243               context->state = STATE_AFTER_CLOSE_ANGLE;
1244               emit_end_element (context, error);
1245             }
1246           else
1247             {
1248               gchar buf[8];
1249
1250               set_error (context,
1251                          error,
1252                          G_MARKUP_ERROR_PARSE,
1253                          _("Odd character '%s', expected a '>' character "
1254                            "to end the empty-element tag '%s'"),
1255                          utf8_str (context->iter, buf),
1256                          current_element (context));
1257             }
1258           break;
1259
1260         case STATE_INSIDE_OPEN_TAG_NAME:
1261           /* Possible next states: BETWEEN_ATTRIBUTES */
1262
1263           /* if there's a partial chunk then it's the first part of the
1264            * tag name. If there's a context->start then it's the start
1265            * of the tag name in current_text, the partial chunk goes
1266            * before that start though.
1267            */
1268           advance_to_name_end (context);
1269
1270           if (context->iter == context->current_text_end)
1271             {
1272               /* The name hasn't necessarily ended. Merge with
1273                * partial chunk, leave state unchanged.
1274                */
1275               add_to_partial (context, context->start, context->iter);
1276             }
1277           else
1278             {
1279               /* The name has ended. Combine it with the partial chunk
1280                * if any; push it on the stack; enter next state.
1281                */
1282               add_to_partial (context, context->start, context->iter);
1283               push_partial_as_tag (context);
1284
1285               context->state = STATE_BETWEEN_ATTRIBUTES;
1286               context->start = NULL;
1287             }
1288           break;
1289
1290         case STATE_INSIDE_ATTRIBUTE_NAME:
1291           /* Possible next states: AFTER_ATTRIBUTE_NAME */
1292
1293           advance_to_name_end (context);
1294           add_to_partial (context, context->start, context->iter);
1295
1296           /* read the full name, if we enter the equals sign state
1297            * then add the attribute to the list (without the value),
1298            * otherwise store a partial chunk to be prepended later.
1299            */
1300           if (context->iter != context->current_text_end)
1301             context->state = STATE_AFTER_ATTRIBUTE_NAME;
1302           break;
1303
1304         case STATE_AFTER_ATTRIBUTE_NAME:
1305           /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1306
1307           skip_spaces (context);
1308
1309           if (context->iter != context->current_text_end)
1310             {
1311               /* The name has ended. Combine it with the partial chunk
1312                * if any; push it on the stack; enter next state.
1313                */
1314               if (!name_validate (context, context->partial_chunk->str, error))
1315                 break;
1316
1317               add_attribute (context, context->partial_chunk);
1318
1319               context->partial_chunk = NULL;
1320               context->start = NULL;
1321
1322               if (*context->iter == '=')
1323                 {
1324                   advance_char (context);
1325                   context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1326                 }
1327               else
1328                 {
1329                   gchar buf[8];
1330
1331                   set_error (context,
1332                              error,
1333                              G_MARKUP_ERROR_PARSE,
1334                              _("Odd character '%s', expected a '=' after "
1335                                "attribute name '%s' of element '%s'"),
1336                              utf8_str (context->iter, buf),
1337                              current_attribute (context),
1338                              current_element (context));
1339
1340                 }
1341             }
1342           break;
1343
1344         case STATE_BETWEEN_ATTRIBUTES:
1345           /* Possible next states: AFTER_CLOSE_ANGLE,
1346            * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1347            */
1348           skip_spaces (context);
1349
1350           if (context->iter != context->current_text_end)
1351             {
1352               if (*context->iter == '/')
1353                 {
1354                   advance_char (context);
1355                   context->state = STATE_AFTER_ELISION_SLASH;
1356                 }
1357               else if (*context->iter == '>')
1358                 {
1359                   advance_char (context);
1360                   context->state = STATE_AFTER_CLOSE_ANGLE;
1361                 }
1362               else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1363                 {
1364                   context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1365                   /* start of attribute name */
1366                   context->start = context->iter;
1367                 }
1368               else
1369                 {
1370                   gchar buf[8];
1371
1372                   set_error (context,
1373                              error,
1374                              G_MARKUP_ERROR_PARSE,
1375                              _("Odd character '%s', expected a '>' or '/' "
1376                                "character to end the start tag of "
1377                                "element '%s', or optionally an attribute; "
1378                                "perhaps you used an invalid character in "
1379                                "an attribute name"),
1380                              utf8_str (context->iter, buf),
1381                              current_element (context));
1382                 }
1383
1384               /* If we're done with attributes, invoke
1385                * the start_element callback
1386                */
1387               if (context->state == STATE_AFTER_ELISION_SLASH ||
1388                   context->state == STATE_AFTER_CLOSE_ANGLE)
1389                 emit_start_element (context, error);
1390             }
1391           break;
1392
1393         case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1394           /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1395
1396           skip_spaces (context);
1397
1398           if (context->iter != context->current_text_end)
1399             {
1400               if (*context->iter == '"')
1401                 {
1402                   advance_char (context);
1403                   context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1404                   context->start = context->iter;
1405                 }
1406               else if (*context->iter == '\'')
1407                 {
1408                   advance_char (context);
1409                   context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1410                   context->start = context->iter;
1411                 }
1412               else
1413                 {
1414                   gchar buf[8];
1415
1416                   set_error (context,
1417                              error,
1418                              G_MARKUP_ERROR_PARSE,
1419                              _("Odd character '%s', expected an open quote mark "
1420                                "after the equals sign when giving value for "
1421                                "attribute '%s' of element '%s'"),
1422                              utf8_str (context->iter, buf),
1423                              current_attribute (context),
1424                              current_element (context));
1425                 }
1426             }
1427           break;
1428
1429         case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1430         case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1431           /* Possible next states: BETWEEN_ATTRIBUTES */
1432           {
1433             gchar delim;
1434
1435             if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1436               {
1437                 delim = '\'';
1438               }
1439             else
1440               {
1441                 delim = '"';
1442               }
1443
1444             do
1445               {
1446                 if (*context->iter == delim)
1447                   break;
1448               }
1449             while (advance_char (context));
1450           }
1451           if (context->iter == context->current_text_end)
1452             {
1453               /* The value hasn't necessarily ended. Merge with
1454                * partial chunk, leave state unchanged.
1455                */
1456               add_to_partial (context, context->start, context->iter);
1457             }
1458           else
1459             {
1460               gboolean is_ascii;
1461               /* The value has ended at the quote mark. Combine it
1462                * with the partial chunk if any; set it for the current
1463                * attribute.
1464                */
1465               add_to_partial (context, context->start, context->iter);
1466
1467               g_assert (context->cur_attr >= 0);
1468
1469               if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) &&
1470                   (is_ascii || text_validate (context, context->partial_chunk->str,
1471                                               context->partial_chunk->len, error)))
1472                 {
1473                   /* success, advance past quote and set state. */
1474                   context->attr_values[context->cur_attr] = context->partial_chunk;
1475                   context->partial_chunk = NULL;
1476                   advance_char (context);
1477                   context->state = STATE_BETWEEN_ATTRIBUTES;
1478                   context->start = NULL;
1479                 }
1480
1481               truncate_partial (context);
1482             }
1483           break;
1484
1485         case STATE_INSIDE_TEXT:
1486           /* Possible next states: AFTER_OPEN_ANGLE */
1487           do
1488             {
1489               if (*context->iter == '<')
1490                 break;
1491             }
1492           while (advance_char (context));
1493
1494           /* The text hasn't necessarily ended. Merge with
1495            * partial chunk, leave state unchanged.
1496            */
1497
1498           add_to_partial (context, context->start, context->iter);
1499
1500           if (context->iter != context->current_text_end)
1501             {
1502               gboolean is_ascii;
1503
1504               /* The text has ended at the open angle. Call the text
1505                * callback.
1506                */
1507               if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) &&
1508                   (is_ascii || text_validate (context, context->partial_chunk->str,
1509                                               context->partial_chunk->len, error)))
1510                 {
1511                   GError *tmp_error = NULL;
1512
1513                   if (context->parser->text)
1514                     (*context->parser->text) (context,
1515                                               context->partial_chunk->str,
1516                                               context->partial_chunk->len,
1517                                               context->user_data,
1518                                               &tmp_error);
1519
1520                   if (tmp_error == NULL)
1521                     {
1522                       /* advance past open angle and set state. */
1523                       advance_char (context);
1524                       context->state = STATE_AFTER_OPEN_ANGLE;
1525                       /* could begin a passthrough */
1526                       context->start = context->iter;
1527                     }
1528                   else
1529                     propagate_error (context, error, tmp_error);
1530                 }
1531
1532               truncate_partial (context);
1533             }
1534           break;
1535
1536         case STATE_AFTER_CLOSE_TAG_SLASH:
1537           /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1538           if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1539             {
1540               context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1541
1542               /* start of tag name */
1543               context->start = context->iter;
1544             }
1545           else
1546             {
1547               gchar buf[8];
1548
1549               set_error (context,
1550                          error,
1551                          G_MARKUP_ERROR_PARSE,
1552                          _("'%s' is not a valid character following "
1553                            "the characters '</'; '%s' may not begin an "
1554                            "element name"),
1555                          utf8_str (context->iter, buf),
1556                          utf8_str (context->iter, buf));
1557             }
1558           break;
1559
1560         case STATE_INSIDE_CLOSE_TAG_NAME:
1561           /* Possible next state: AFTER_CLOSE_TAG_NAME */
1562           advance_to_name_end (context);
1563           add_to_partial (context, context->start, context->iter);
1564
1565           if (context->iter != context->current_text_end)
1566             context->state = STATE_AFTER_CLOSE_TAG_NAME;
1567           break;
1568
1569         case STATE_AFTER_CLOSE_TAG_NAME:
1570           /* Possible next state: AFTER_CLOSE_TAG_SLASH */
1571
1572           skip_spaces (context);
1573
1574           if (context->iter != context->current_text_end)
1575             {
1576               GString *close_name;
1577
1578               close_name = context->partial_chunk;
1579               context->partial_chunk = NULL;
1580
1581               if (*context->iter != '>')
1582                 {
1583                   gchar buf[8];
1584
1585                   set_error (context,
1586                              error,
1587                              G_MARKUP_ERROR_PARSE,
1588                              _("'%s' is not a valid character following "
1589                                "the close element name '%s'; the allowed "
1590                                "character is '>'"),
1591                              utf8_str (context->iter, buf),
1592                              close_name->str);
1593                 }
1594               else if (context->tag_stack == NULL)
1595                 {
1596                   set_error (context,
1597                              error,
1598                              G_MARKUP_ERROR_PARSE,
1599                              _("Element '%s' was closed, no element "
1600                                "is currently open"),
1601                              close_name->str);
1602                 }
1603               else if (strcmp (close_name->str, current_element (context)) != 0)
1604                 {
1605                   set_error (context,
1606                              error,
1607                              G_MARKUP_ERROR_PARSE,
1608                              _("Element '%s' was closed, but the currently "
1609                                "open element is '%s'"),
1610                              close_name->str,
1611                              current_element (context));
1612                 }
1613               else
1614                 {
1615                   advance_char (context);
1616                   context->state = STATE_AFTER_CLOSE_ANGLE;
1617                   context->start = NULL;
1618
1619                   emit_end_element (context, error);
1620                 }
1621               context->partial_chunk = close_name;
1622               truncate_partial (context);
1623             }
1624           break;
1625
1626         case STATE_INSIDE_PASSTHROUGH:
1627           /* Possible next state: AFTER_CLOSE_ANGLE */
1628           do
1629             {
1630               if (*context->iter == '<')
1631                 context->balance++;
1632               if (*context->iter == '>')
1633                 {
1634                   gchar *str;
1635                   gsize len;
1636
1637                   context->balance--;
1638                   add_to_partial (context, context->start, context->iter);
1639                   context->start = context->iter;
1640
1641                   str = context->partial_chunk->str;
1642                   len = context->partial_chunk->len;
1643
1644                   if (str[1] == '?' && str[len - 1] == '?')
1645                     break;
1646                   if (strncmp (str, "<!--", 4) == 0 &&
1647                       strcmp (str + len - 2, "--") == 0)
1648                     break;
1649                   if (strncmp (str, "<![CDATA[", 9) == 0 &&
1650                       strcmp (str + len - 2, "]]") == 0)
1651                     break;
1652                   if (strncmp (str, "<!DOCTYPE", 9) == 0 &&
1653                       context->balance == 0)
1654                     break;
1655                 }
1656             }
1657           while (advance_char (context));
1658
1659           if (context->iter == context->current_text_end)
1660             {
1661               /* The passthrough hasn't necessarily ended. Merge with
1662                * partial chunk, leave state unchanged.
1663                */
1664                add_to_partial (context, context->start, context->iter);
1665             }
1666           else
1667             {
1668               /* The passthrough has ended at the close angle. Combine
1669                * it with the partial chunk if any. Call the passthrough
1670                * callback. Note that the open/close angles are
1671                * included in the text of the passthrough.
1672                */
1673               GError *tmp_error = NULL;
1674
1675               advance_char (context); /* advance past close angle */
1676               add_to_partial (context, context->start, context->iter);
1677
1678               if (context->flags & G_MARKUP_TREAT_CDATA_AS_TEXT &&
1679                   strncmp (context->partial_chunk->str, "<![CDATA[", 9) == 0)
1680                 {
1681                   if (context->parser->text &&
1682                       text_validate (context,
1683                                      context->partial_chunk->str + 9,
1684                                      context->partial_chunk->len - 12,
1685                                      error))
1686                     (*context->parser->text) (context,
1687                                               context->partial_chunk->str + 9,
1688                                               context->partial_chunk->len - 12,
1689                                               context->user_data,
1690                                               &tmp_error);
1691                 }
1692               else if (context->parser->passthrough &&
1693                        text_validate (context,
1694                                       context->partial_chunk->str,
1695                                       context->partial_chunk->len,
1696                                       error))
1697                 (*context->parser->passthrough) (context,
1698                                                  context->partial_chunk->str,
1699                                                  context->partial_chunk->len,
1700                                                  context->user_data,
1701                                                  &tmp_error);
1702
1703               truncate_partial (context);
1704
1705               if (tmp_error == NULL)
1706                 {
1707                   context->state = STATE_AFTER_CLOSE_ANGLE;
1708                   context->start = context->iter; /* could begin text */
1709                 }
1710               else
1711                 propagate_error (context, error, tmp_error);
1712             }
1713           break;
1714
1715         case STATE_ERROR:
1716           goto finished;
1717           break;
1718
1719         default:
1720           g_assert_not_reached ();
1721           break;
1722         }
1723     }
1724
1725  finished:
1726   context->parsing = FALSE;
1727
1728   return context->state != STATE_ERROR;
1729 }
1730
1731 /**
1732  * g_markup_parse_context_end_parse:
1733  * @context: a #GMarkupParseContext
1734  * @error: return location for a #GError
1735  *
1736  * Signals to the #GMarkupParseContext that all data has been
1737  * fed into the parse context with g_markup_parse_context_parse().
1738  *
1739  * This function reports an error if the document isn't complete,
1740  * for example if elements are still open.
1741  *
1742  * Returns: %TRUE on success, %FALSE if an error was set
1743  */
1744 gboolean
1745 g_markup_parse_context_end_parse (GMarkupParseContext  *context,
1746                                   GError              **error)
1747 {
1748   g_return_val_if_fail (context != NULL, FALSE);
1749   g_return_val_if_fail (!context->parsing, FALSE);
1750   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1751
1752   if (context->partial_chunk != NULL)
1753     {
1754       g_string_free (context->partial_chunk, TRUE);
1755       context->partial_chunk = NULL;
1756     }
1757
1758   if (context->document_empty)
1759     {
1760       set_error_literal (context, error, G_MARKUP_ERROR_EMPTY,
1761                          _("Document was empty or contained only whitespace"));
1762       return FALSE;
1763     }
1764
1765   context->parsing = TRUE;
1766
1767   switch (context->state)
1768     {
1769     case STATE_START:
1770       /* Nothing to do */
1771       break;
1772
1773     case STATE_AFTER_OPEN_ANGLE:
1774       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1775                          _("Document ended unexpectedly just after an open angle bracket '<'"));
1776       break;
1777
1778     case STATE_AFTER_CLOSE_ANGLE:
1779       if (context->tag_stack != NULL)
1780         {
1781           /* Error message the same as for INSIDE_TEXT */
1782           set_error (context, error, G_MARKUP_ERROR_PARSE,
1783                      _("Document ended unexpectedly with elements still open - "
1784                        "'%s' was the last element opened"),
1785                      current_element (context));
1786         }
1787       break;
1788
1789     case STATE_AFTER_ELISION_SLASH:
1790       set_error (context, error, G_MARKUP_ERROR_PARSE,
1791                  _("Document ended unexpectedly, expected to see a close angle "
1792                    "bracket ending the tag <%s/>"), current_element (context));
1793       break;
1794
1795     case STATE_INSIDE_OPEN_TAG_NAME:
1796       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1797                          _("Document ended unexpectedly inside an element name"));
1798       break;
1799
1800     case STATE_INSIDE_ATTRIBUTE_NAME:
1801     case STATE_AFTER_ATTRIBUTE_NAME:
1802       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1803                          _("Document ended unexpectedly inside an attribute name"));
1804       break;
1805
1806     case STATE_BETWEEN_ATTRIBUTES:
1807       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1808                          _("Document ended unexpectedly inside an element-opening "
1809                            "tag."));
1810       break;
1811
1812     case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1813       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1814                          _("Document ended unexpectedly after the equals sign "
1815                            "following an attribute name; no attribute value"));
1816       break;
1817
1818     case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1819     case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1820       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1821                          _("Document ended unexpectedly while inside an attribute "
1822                            "value"));
1823       break;
1824
1825     case STATE_INSIDE_TEXT:
1826       g_assert (context->tag_stack != NULL);
1827       set_error (context, error, G_MARKUP_ERROR_PARSE,
1828                  _("Document ended unexpectedly with elements still open - "
1829                    "'%s' was the last element opened"),
1830                  current_element (context));
1831       break;
1832
1833     case STATE_AFTER_CLOSE_TAG_SLASH:
1834     case STATE_INSIDE_CLOSE_TAG_NAME:
1835     case STATE_AFTER_CLOSE_TAG_NAME:
1836       set_error (context, error, G_MARKUP_ERROR_PARSE,
1837                  _("Document ended unexpectedly inside the close tag for "
1838                    "element '%s'"), current_element (context));
1839       break;
1840
1841     case STATE_INSIDE_PASSTHROUGH:
1842       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1843                          _("Document ended unexpectedly inside a comment or "
1844                            "processing instruction"));
1845       break;
1846
1847     case STATE_ERROR:
1848     default:
1849       g_assert_not_reached ();
1850       break;
1851     }
1852
1853   context->parsing = FALSE;
1854
1855   return context->state != STATE_ERROR;
1856 }
1857
1858 /**
1859  * g_markup_parse_context_get_element:
1860  * @context: a #GMarkupParseContext
1861  *
1862  * Retrieves the name of the currently open element.
1863  *
1864  * If called from the start_element or end_element handlers this will
1865  * give the element_name as passed to those functions. For the parent
1866  * elements, see g_markup_parse_context_get_element_stack().
1867  *
1868  * Returns: the name of the currently open element, or %NULL
1869  *
1870  * Since: 2.2
1871  */
1872 const gchar *
1873 g_markup_parse_context_get_element (GMarkupParseContext *context)
1874 {
1875   g_return_val_if_fail (context != NULL, NULL);
1876
1877   if (context->tag_stack == NULL)
1878     return NULL;
1879   else
1880     return current_element (context);
1881 }
1882
1883 /**
1884  * g_markup_parse_context_get_element_stack:
1885  * @context: a #GMarkupParseContext
1886  *
1887  * Retrieves the element stack from the internal state of the parser.
1888  *
1889  * The returned #GSList is a list of strings where the first item is
1890  * the currently open tag (as would be returned by
1891  * g_markup_parse_context_get_element()) and the next item is its
1892  * immediate parent.
1893  *
1894  * This function is intended to be used in the start_element and
1895  * end_element handlers where g_markup_parse_context_get_element()
1896  * would merely return the name of the element that is being
1897  * processed.
1898  *
1899  * Returns: the element stack, which must not be modified
1900  *
1901  * Since: 2.16
1902  */
1903 const GSList *
1904 g_markup_parse_context_get_element_stack (GMarkupParseContext *context)
1905 {
1906   g_return_val_if_fail (context != NULL, NULL);
1907   return context->tag_stack;
1908 }
1909
1910 /**
1911  * g_markup_parse_context_get_position:
1912  * @context: a #GMarkupParseContext
1913  * @line_number: (allow-none): return location for a line number, or %NULL
1914  * @char_number: (allow-none): return location for a char-on-line number, or %NULL
1915  *
1916  * Retrieves the current line number and the number of the character on
1917  * that line. Intended for use in error messages; there are no strict
1918  * semantics for what constitutes the "current" line number other than
1919  * "the best number we could come up with for error messages."
1920  */
1921 void
1922 g_markup_parse_context_get_position (GMarkupParseContext *context,
1923                                      gint                *line_number,
1924                                      gint                *char_number)
1925 {
1926   g_return_if_fail (context != NULL);
1927
1928   if (line_number)
1929     *line_number = context->line_number;
1930
1931   if (char_number)
1932     *char_number = context->char_number;
1933 }
1934
1935 /**
1936  * g_markup_parse_context_get_user_data:
1937  * @context: a #GMarkupParseContext
1938  *
1939  * Returns the user_data associated with @context.
1940  *
1941  * This will either be the user_data that was provided to
1942  * g_markup_parse_context_new() or to the most recent call
1943  * of g_markup_parse_context_push().
1944  *
1945  * Returns: the provided user_data. The returned data belongs to
1946  *     the markup context and will be freed when
1947  *     g_markup_parse_context_free() is called.
1948  *
1949  * Since: 2.18
1950  */
1951 gpointer
1952 g_markup_parse_context_get_user_data (GMarkupParseContext *context)
1953 {
1954   return context->user_data;
1955 }
1956
1957 /**
1958  * g_markup_parse_context_push:
1959  * @context: a #GMarkupParseContext
1960  * @parser: a #GMarkupParser
1961  * @user_data: user data to pass to #GMarkupParser functions
1962  *
1963  * Temporarily redirects markup data to a sub-parser.
1964  *
1965  * This function may only be called from the start_element handler of
1966  * a #GMarkupParser. It must be matched with a corresponding call to
1967  * g_markup_parse_context_pop() in the matching end_element handler
1968  * (except in the case that the parser aborts due to an error).
1969  *
1970  * All tags, text and other data between the matching tags is
1971  * redirected to the subparser given by @parser. @user_data is used
1972  * as the user_data for that parser. @user_data is also passed to the
1973  * error callback in the event that an error occurs. This includes
1974  * errors that occur in subparsers of the subparser.
1975  *
1976  * The end tag matching the start tag for which this call was made is
1977  * handled by the previous parser (which is given its own user_data)
1978  * which is why g_markup_parse_context_pop() is provided to allow "one
1979  * last access" to the @user_data provided to this function. In the
1980  * case of error, the @user_data provided here is passed directly to
1981  * the error callback of the subparser and g_markup_parse_context_pop()
1982  * should not be called. In either case, if @user_data was allocated
1983  * then it ought to be freed from both of these locations.
1984  *
1985  * This function is not intended to be directly called by users
1986  * interested in invoking subparsers. Instead, it is intended to be
1987  * used by the subparsers themselves to implement a higher-level
1988  * interface.
1989  *
1990  * As an example, see the following implementation of a simple
1991  * parser that counts the number of tags encountered.
1992  *
1993  * |[<!-- language="C" -->
1994  * typedef struct
1995  * {
1996  *   gint tag_count;
1997  * } CounterData;
1998  *
1999  * static void
2000  * counter_start_element (GMarkupParseContext  *context,
2001  *                        const gchar          *element_name,
2002  *                        const gchar         **attribute_names,
2003  *                        const gchar         **attribute_values,
2004  *                        gpointer              user_data,
2005  *                        GError              **error)
2006  * {
2007  *   CounterData *data = user_data;
2008  *
2009  *   data->tag_count++;
2010  * }
2011  *
2012  * static void
2013  * counter_error (GMarkupParseContext *context,
2014  *                GError              *error,
2015  *                gpointer             user_data)
2016  * {
2017  *   CounterData *data = user_data;
2018  *
2019  *   g_slice_free (CounterData, data);
2020  * }
2021  *
2022  * static GMarkupParser counter_subparser =
2023  * {
2024  *   counter_start_element,
2025  *   NULL,
2026  *   NULL,
2027  *   NULL,
2028  *   counter_error
2029  * };
2030  * ]|
2031  *
2032  * In order to allow this parser to be easily used as a subparser, the
2033  * following interface is provided:
2034  *
2035  * |[<!-- language="C" -->
2036  * void
2037  * start_counting (GMarkupParseContext *context)
2038  * {
2039  *   CounterData *data = g_slice_new (CounterData);
2040  *
2041  *   data->tag_count = 0;
2042  *   g_markup_parse_context_push (context, &counter_subparser, data);
2043  * }
2044  *
2045  * gint
2046  * end_counting (GMarkupParseContext *context)
2047  * {
2048  *   CounterData *data = g_markup_parse_context_pop (context);
2049  *   int result;
2050  *
2051  *   result = data->tag_count;
2052  *   g_slice_free (CounterData, data);
2053  *
2054  *   return result;
2055  * }
2056  * ]|
2057  *
2058  * The subparser would then be used as follows:
2059  *
2060  * |[<!-- language="C" -->
2061  * static void start_element (context, element_name, ...)
2062  * {
2063  *   if (strcmp (element_name, "count-these") == 0)
2064  *     start_counting (context);
2065  *
2066  *   // else, handle other tags...
2067  * }
2068  *
2069  * static void end_element (context, element_name, ...)
2070  * {
2071  *   if (strcmp (element_name, "count-these") == 0)
2072  *     g_print ("Counted %d tags\n", end_counting (context));
2073  *
2074  *   // else, handle other tags...
2075  * }
2076  * ]|
2077  *
2078  * Since: 2.18
2079  **/
2080 void
2081 g_markup_parse_context_push (GMarkupParseContext *context,
2082                              const GMarkupParser *parser,
2083                              gpointer             user_data)
2084 {
2085   GMarkupRecursionTracker *tracker;
2086
2087   tracker = g_slice_new (GMarkupRecursionTracker);
2088   tracker->prev_element = context->subparser_element;
2089   tracker->prev_parser = context->parser;
2090   tracker->prev_user_data = context->user_data;
2091
2092   context->subparser_element = current_element (context);
2093   context->parser = parser;
2094   context->user_data = user_data;
2095
2096   context->subparser_stack = g_slist_prepend (context->subparser_stack,
2097                                               tracker);
2098 }
2099
2100 /**
2101  * g_markup_parse_context_pop:
2102  * @context: a #GMarkupParseContext
2103  *
2104  * Completes the process of a temporary sub-parser redirection.
2105  *
2106  * This function exists to collect the user_data allocated by a
2107  * matching call to g_markup_parse_context_push(). It must be called
2108  * in the end_element handler corresponding to the start_element
2109  * handler during which g_markup_parse_context_push() was called.
2110  * You must not call this function from the error callback -- the
2111  * @user_data is provided directly to the callback in that case.
2112  *
2113  * This function is not intended to be directly called by users
2114  * interested in invoking subparsers. Instead, it is intended to
2115  * be used by the subparsers themselves to implement a higher-level
2116  * interface.
2117  *
2118  * Returns: the user data passed to g_markup_parse_context_push()
2119  *
2120  * Since: 2.18
2121  */
2122 gpointer
2123 g_markup_parse_context_pop (GMarkupParseContext *context)
2124 {
2125   gpointer user_data;
2126
2127   if (!context->awaiting_pop)
2128     possibly_finish_subparser (context);
2129
2130   g_assert (context->awaiting_pop);
2131
2132   context->awaiting_pop = FALSE;
2133
2134   /* valgrind friendliness */
2135   user_data = context->held_user_data;
2136   context->held_user_data = NULL;
2137
2138   return user_data;
2139 }
2140
2141 static void
2142 append_escaped_text (GString     *str,
2143                      const gchar *text,
2144                      gssize       length)
2145 {
2146   const gchar *p;
2147   const gchar *end;
2148   gunichar c;
2149
2150   p = text;
2151   end = text + length;
2152
2153   while (p < end)
2154     {
2155       const gchar *next;
2156       next = g_utf8_next_char (p);
2157
2158       switch (*p)
2159         {
2160         case '&':
2161           g_string_append (str, "&amp;");
2162           break;
2163
2164         case '<':
2165           g_string_append (str, "&lt;");
2166           break;
2167
2168         case '>':
2169           g_string_append (str, "&gt;");
2170           break;
2171
2172         case '\'':
2173           g_string_append (str, "&apos;");
2174           break;
2175
2176         case '"':
2177           g_string_append (str, "&quot;");
2178           break;
2179
2180         default:
2181           c = g_utf8_get_char (p);
2182           if ((0x1 <= c && c <= 0x8) ||
2183               (0xb <= c && c  <= 0xc) ||
2184               (0xe <= c && c <= 0x1f) ||
2185               (0x7f <= c && c <= 0x84) ||
2186               (0x86 <= c && c <= 0x9f))
2187             g_string_append_printf (str, "&#x%x;", c);
2188           else
2189             g_string_append_len (str, p, next - p);
2190           break;
2191         }
2192
2193       p = next;
2194     }
2195 }
2196
2197 /**
2198  * g_markup_escape_text:
2199  * @text: some valid UTF-8 text
2200  * @length: length of @text in bytes, or -1 if the text is nul-terminated
2201  *
2202  * Escapes text so that the markup parser will parse it verbatim.
2203  * Less than, greater than, ampersand, etc. are replaced with the
2204  * corresponding entities. This function would typically be used
2205  * when writing out a file to be parsed with the markup parser.
2206  *
2207  * Note that this function doesn't protect whitespace and line endings
2208  * from being processed according to the XML rules for normalization
2209  * of line endings and attribute values.
2210  *
2211  * Note also that this function will produce character references in
2212  * the range of &#x1; ... &#x1f; for all control sequences
2213  * except for tabstop, newline and carriage return.  The character
2214  * references in this range are not valid XML 1.0, but they are
2215  * valid XML 1.1 and will be accepted by the GMarkup parser.
2216  *
2217  * Returns: a newly allocated string with the escaped text
2218  */
2219 gchar*
2220 g_markup_escape_text (const gchar *text,
2221                       gssize       length)
2222 {
2223   GString *str;
2224
2225   g_return_val_if_fail (text != NULL, NULL);
2226
2227   if (length < 0)
2228     length = strlen (text);
2229
2230   /* prealloc at least as long as original text */
2231   str = g_string_sized_new (length);
2232   append_escaped_text (str, text, length);
2233
2234   return g_string_free (str, FALSE);
2235 }
2236
2237 /*
2238  * find_conversion:
2239  * @format: a printf-style format string
2240  * @after: location to store a pointer to the character after
2241  *     the returned conversion. On a %NULL return, returns the
2242  *     pointer to the trailing NUL in the string
2243  *
2244  * Find the next conversion in a printf-style format string.
2245  * Partially based on code from printf-parser.c,
2246  * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
2247  *
2248  * Returns: pointer to the next conversion in @format,
2249  *  or %NULL, if none.
2250  */
2251 static const char *
2252 find_conversion (const char  *format,
2253                  const char **after)
2254 {
2255   const char *start = format;
2256   const char *cp;
2257
2258   while (*start != '\0' && *start != '%')
2259     start++;
2260
2261   if (*start == '\0')
2262     {
2263       *after = start;
2264       return NULL;
2265     }
2266
2267   cp = start + 1;
2268
2269   if (*cp == '\0')
2270     {
2271       *after = cp;
2272       return NULL;
2273     }
2274
2275   /* Test for positional argument.  */
2276   if (*cp >= '0' && *cp <= '9')
2277     {
2278       const char *np;
2279
2280       for (np = cp; *np >= '0' && *np <= '9'; np++)
2281         ;
2282       if (*np == '$')
2283         cp = np + 1;
2284     }
2285
2286   /* Skip the flags.  */
2287   for (;;)
2288     {
2289       if (*cp == '\'' ||
2290           *cp == '-' ||
2291           *cp == '+' ||
2292           *cp == ' ' ||
2293           *cp == '#' ||
2294           *cp == '0')
2295         cp++;
2296       else
2297         break;
2298     }
2299
2300   /* Skip the field width.  */
2301   if (*cp == '*')
2302     {
2303       cp++;
2304
2305       /* Test for positional argument.  */
2306       if (*cp >= '0' && *cp <= '9')
2307         {
2308           const char *np;
2309
2310           for (np = cp; *np >= '0' && *np <= '9'; np++)
2311             ;
2312           if (*np == '$')
2313             cp = np + 1;
2314         }
2315     }
2316   else
2317     {
2318       for (; *cp >= '0' && *cp <= '9'; cp++)
2319         ;
2320     }
2321
2322   /* Skip the precision.  */
2323   if (*cp == '.')
2324     {
2325       cp++;
2326       if (*cp == '*')
2327         {
2328           /* Test for positional argument.  */
2329           if (*cp >= '0' && *cp <= '9')
2330             {
2331               const char *np;
2332
2333               for (np = cp; *np >= '0' && *np <= '9'; np++)
2334                 ;
2335               if (*np == '$')
2336                 cp = np + 1;
2337             }
2338         }
2339       else
2340         {
2341           for (; *cp >= '0' && *cp <= '9'; cp++)
2342             ;
2343         }
2344     }
2345
2346   /* Skip argument type/size specifiers.  */
2347   while (*cp == 'h' ||
2348          *cp == 'L' ||
2349          *cp == 'l' ||
2350          *cp == 'j' ||
2351          *cp == 'z' ||
2352          *cp == 'Z' ||
2353          *cp == 't')
2354     cp++;
2355
2356   /* Skip the conversion character.  */
2357   cp++;
2358
2359   *after = cp;
2360   return start;
2361 }
2362
2363 /**
2364  * g_markup_vprintf_escaped:
2365  * @format: printf() style format string
2366  * @args: variable argument list, similar to vprintf()
2367  *
2368  * Formats the data in @args according to @format, escaping
2369  * all string and character arguments in the fashion
2370  * of g_markup_escape_text(). See g_markup_printf_escaped().
2371  *
2372  * Returns: newly allocated result from formatting
2373  *  operation. Free with g_free().
2374  *
2375  * Since: 2.4
2376  */
2377 #pragma GCC diagnostic push
2378 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
2379
2380 gchar *
2381 g_markup_vprintf_escaped (const gchar *format,
2382                           va_list      args)
2383 {
2384   GString *format1;
2385   GString *format2;
2386   GString *result = NULL;
2387   gchar *output1 = NULL;
2388   gchar *output2 = NULL;
2389   const char *p, *op1, *op2;
2390   va_list args2;
2391
2392   /* The technique here, is that we make two format strings that
2393    * have the identical conversions in the identical order to the
2394    * original strings, but differ in the text in-between. We
2395    * then use the normal g_strdup_vprintf() to format the arguments
2396    * with the two new format strings. By comparing the results,
2397    * we can figure out what segments of the output come from
2398    * the original format string, and what from the arguments,
2399    * and thus know what portions of the string to escape.
2400    *
2401    * For instance, for:
2402    *
2403    *  g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5);
2404    *
2405    * We form the two format strings "%sX%dX" and %sY%sY". The results
2406    * of formatting with those two strings are
2407    *
2408    * "%sX%dX" => "Susan & FredX5X"
2409    * "%sY%dY" => "Susan & FredY5Y"
2410    *
2411    * To find the span of the first argument, we find the first position
2412    * where the two arguments differ, which tells us that the first
2413    * argument formatted to "Susan & Fred". We then escape that
2414    * to "Susan & Fred" and join up with the intermediate portions
2415    * of the format string and the second argument to get
2416    * "Susan & Fred ate 5 apples".
2417    */
2418
2419   /* Create the two modified format strings
2420    */
2421   format1 = g_string_new (NULL);
2422   format2 = g_string_new (NULL);
2423   p = format;
2424   while (TRUE)
2425     {
2426       const char *after;
2427       const char *conv = find_conversion (p, &after);
2428       if (!conv)
2429         break;
2430
2431       g_string_append_len (format1, conv, after - conv);
2432       g_string_append_c (format1, 'X');
2433       g_string_append_len (format2, conv, after - conv);
2434       g_string_append_c (format2, 'Y');
2435
2436       p = after;
2437     }
2438
2439   /* Use them to format the arguments
2440    */
2441   G_VA_COPY (args2, args);
2442
2443   output1 = g_strdup_vprintf (format1->str, args);
2444
2445   if (!output1)
2446     {
2447       va_end (args2);
2448       goto cleanup;
2449     }
2450
2451   output2 = g_strdup_vprintf (format2->str, args2);
2452   va_end (args2);
2453   if (!output2)
2454     goto cleanup;
2455   result = g_string_new (NULL);
2456
2457   /* Iterate through the original format string again,
2458    * copying the non-conversion portions and the escaped
2459    * converted arguments to the output string.
2460    */
2461   op1 = output1;
2462   op2 = output2;
2463   p = format;
2464   while (TRUE)
2465     {
2466       const char *after;
2467       const char *output_start;
2468       const char *conv = find_conversion (p, &after);
2469       char *escaped;
2470
2471       if (!conv)        /* The end, after points to the trailing \0 */
2472         {
2473           g_string_append_len (result, p, after - p);
2474           break;
2475         }
2476
2477       g_string_append_len (result, p, conv - p);
2478       output_start = op1;
2479       while (*op1 == *op2)
2480         {
2481           op1++;
2482           op2++;
2483         }
2484
2485       escaped = g_markup_escape_text (output_start, op1 - output_start);
2486       g_string_append (result, escaped);
2487       g_free (escaped);
2488
2489       p = after;
2490       op1++;
2491       op2++;
2492     }
2493
2494  cleanup:
2495   g_string_free (format1, TRUE);
2496   g_string_free (format2, TRUE);
2497   g_free (output1);
2498   g_free (output2);
2499
2500   if (result)
2501     return g_string_free (result, FALSE);
2502   else
2503     return NULL;
2504 }
2505
2506 #pragma GCC diagnostic pop
2507
2508 /**
2509  * g_markup_printf_escaped:
2510  * @format: printf() style format string
2511  * @...: the arguments to insert in the format string
2512  *
2513  * Formats arguments according to @format, escaping
2514  * all string and character arguments in the fashion
2515  * of g_markup_escape_text(). This is useful when you
2516  * want to insert literal strings into XML-style markup
2517  * output, without having to worry that the strings
2518  * might themselves contain markup.
2519  *
2520  * |[<!-- language="C" -->
2521  * const char *store = "Fortnum & Mason";
2522  * const char *item = "Tea";
2523  * char *output;
2524  *
2525  * output = g_markup_printf_escaped ("<purchase>"
2526  *                                   "<store>%s</store>"
2527  *                                   "<item>%s</item>"
2528  *                                   "</purchase>",
2529  *                                   store, item);
2530  * ]|
2531  *
2532  * Returns: newly allocated result from formatting
2533  *    operation. Free with g_free().
2534  *
2535  * Since: 2.4
2536  */
2537 gchar *
2538 g_markup_printf_escaped (const gchar *format, ...)
2539 {
2540   char *result;
2541   va_list args;
2542
2543   va_start (args, format);
2544   result = g_markup_vprintf_escaped (format, args);
2545   va_end (args);
2546
2547   return result;
2548 }
2549
2550 static gboolean
2551 g_markup_parse_boolean (const char  *string,
2552                         gboolean    *value)
2553 {
2554   char const * const falses[] = { "false", "f", "no", "n", "0" };
2555   char const * const trues[] = { "true", "t", "yes", "y", "1" };
2556   int i;
2557
2558   for (i = 0; i < G_N_ELEMENTS (falses); i++)
2559     {
2560       if (g_ascii_strcasecmp (string, falses[i]) == 0)
2561         {
2562           if (value != NULL)
2563             *value = FALSE;
2564
2565           return TRUE;
2566         }
2567     }
2568
2569   for (i = 0; i < G_N_ELEMENTS (trues); i++)
2570     {
2571       if (g_ascii_strcasecmp (string, trues[i]) == 0)
2572         {
2573           if (value != NULL)
2574             *value = TRUE;
2575
2576           return TRUE;
2577         }
2578     }
2579
2580   return FALSE;
2581 }
2582
2583 /**
2584  * GMarkupCollectType:
2585  * @G_MARKUP_COLLECT_INVALID: used to terminate the list of attributes
2586  *     to collect
2587  * @G_MARKUP_COLLECT_STRING: collect the string pointer directly from
2588  *     the attribute_values[] array. Expects a parameter of type (const
2589  *     char **). If %G_MARKUP_COLLECT_OPTIONAL is specified and the
2590  *     attribute isn't present then the pointer will be set to %NULL
2591  * @G_MARKUP_COLLECT_STRDUP: as with %G_MARKUP_COLLECT_STRING, but
2592  *     expects a parameter of type (char **) and g_strdup()s the
2593  *     returned pointer. The pointer must be freed with g_free()
2594  * @G_MARKUP_COLLECT_BOOLEAN: expects a parameter of type (gboolean *)
2595  *     and parses the attribute value as a boolean. Sets %FALSE if the
2596  *     attribute isn't present. Valid boolean values consist of
2597  *     (case-insensitive) "false", "f", "no", "n", "0" and "true", "t",
2598  *     "yes", "y", "1"
2599  * @G_MARKUP_COLLECT_TRISTATE: as with %G_MARKUP_COLLECT_BOOLEAN, but
2600  *     in the case of a missing attribute a value is set that compares
2601  *     equal to neither %FALSE nor %TRUE G_MARKUP_COLLECT_OPTIONAL is
2602  *     implied
2603  * @G_MARKUP_COLLECT_OPTIONAL: can be bitwise ORed with the other fields.
2604  *     If present, allows the attribute not to appear. A default value
2605  *     is set depending on what value type is used
2606  *
2607  * A mixed enumerated type and flags field. You must specify one type
2608  * (string, strdup, boolean, tristate).  Additionally, you may  optionally
2609  * bitwise OR the type with the flag %G_MARKUP_COLLECT_OPTIONAL.
2610  *
2611  * It is likely that this enum will be extended in the future to
2612  * support other types.
2613  */
2614
2615 /**
2616  * g_markup_collect_attributes:
2617  * @element_name: the current tag name
2618  * @attribute_names: the attribute names
2619  * @attribute_values: the attribute values
2620  * @error: a pointer to a #GError or %NULL
2621  * @first_type: the #GMarkupCollectType of the first attribute
2622  * @first_attr: the name of the first attribute
2623  * @...: a pointer to the storage location of the first attribute
2624  *     (or %NULL), followed by more types names and pointers, ending
2625  *     with %G_MARKUP_COLLECT_INVALID
2626  *
2627  * Collects the attributes of the element from the data passed to the
2628  * #GMarkupParser start_element function, dealing with common error
2629  * conditions and supporting boolean values.
2630  *
2631  * This utility function is not required to write a parser but can save
2632  * a lot of typing.
2633  *
2634  * The @element_name, @attribute_names, @attribute_values and @error
2635  * parameters passed to the start_element callback should be passed
2636  * unmodified to this function.
2637  *
2638  * Following these arguments is a list of "supported" attributes to collect.
2639  * It is an error to specify multiple attributes with the same name. If any
2640  * attribute not in the list appears in the @attribute_names array then an
2641  * unknown attribute error will result.
2642  *
2643  * The #GMarkupCollectType field allows specifying the type of collection
2644  * to perform and if a given attribute must appear or is optional.
2645  *
2646  * The attribute name is simply the name of the attribute to collect.
2647  *
2648  * The pointer should be of the appropriate type (see the descriptions
2649  * under #GMarkupCollectType) and may be %NULL in case a particular
2650  * attribute is to be allowed but ignored.
2651  *
2652  * This function deals with issuing errors for missing attributes
2653  * (of type %G_MARKUP_ERROR_MISSING_ATTRIBUTE), unknown attributes
2654  * (of type %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE) and duplicate
2655  * attributes (of type %G_MARKUP_ERROR_INVALID_CONTENT) as well
2656  * as parse errors for boolean-valued attributes (again of type
2657  * %G_MARKUP_ERROR_INVALID_CONTENT). In all of these cases %FALSE
2658  * will be returned and @error will be set as appropriate.
2659  *
2660  * Returns: %TRUE if successful
2661  *
2662  * Since: 2.16
2663  **/
2664 gboolean
2665 g_markup_collect_attributes (const gchar         *element_name,
2666                              const gchar        **attribute_names,
2667                              const gchar        **attribute_values,
2668                              GError             **error,
2669                              GMarkupCollectType   first_type,
2670                              const gchar         *first_attr,
2671                              ...)
2672 {
2673   GMarkupCollectType type;
2674   const gchar *attr;
2675   guint64 collected;
2676   int written;
2677   va_list ap;
2678   int i;
2679
2680   type = first_type;
2681   attr = first_attr;
2682   collected = 0;
2683   written = 0;
2684
2685   va_start (ap, first_attr);
2686   while (type != G_MARKUP_COLLECT_INVALID)
2687     {
2688       gboolean mandatory;
2689       const gchar *value;
2690
2691       mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL);
2692       type &= (G_MARKUP_COLLECT_OPTIONAL - 1);
2693
2694       /* tristate records a value != TRUE and != FALSE
2695        * for the case where the attribute is missing
2696        */
2697       if (type == G_MARKUP_COLLECT_TRISTATE)
2698         mandatory = FALSE;
2699
2700       for (i = 0; attribute_names[i]; i++)
2701         if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i)))
2702           if (!strcmp (attribute_names[i], attr))
2703             break;
2704
2705       /* ISO C99 only promises that the user can pass up to 127 arguments.
2706        * Subtracting the first 4 arguments plus the final NULL and dividing
2707        * by 3 arguments per collected attribute, we are left with a maximum
2708        * number of supported attributes of (127 - 5) / 3 = 40.
2709        *
2710        * In reality, nobody is ever going to call us with anywhere close to
2711        * 40 attributes to collect, so it is safe to assume that if i > 40
2712        * then the user has given some invalid or repeated arguments.  These
2713        * problems will be caught and reported at the end of the function.
2714        *
2715        * We know at this point that we have an error, but we don't know
2716        * what error it is, so just continue...
2717        */
2718       if (i < 40)
2719         collected |= (G_GUINT64_CONSTANT(1) << i);
2720
2721       value = attribute_values[i];
2722
2723       if (value == NULL && mandatory)
2724         {
2725           g_set_error (error, G_MARKUP_ERROR,
2726                        G_MARKUP_ERROR_MISSING_ATTRIBUTE,
2727                        "element '%s' requires attribute '%s'",
2728                        element_name, attr);
2729
2730           va_end (ap);
2731           goto failure;
2732         }
2733
2734       switch (type)
2735         {
2736         case G_MARKUP_COLLECT_STRING:
2737           {
2738             const char **str_ptr;
2739
2740             str_ptr = va_arg (ap, const char **);
2741
2742             if (str_ptr != NULL)
2743               *str_ptr = value;
2744           }
2745           break;
2746
2747         case G_MARKUP_COLLECT_STRDUP:
2748           {
2749             char **str_ptr;
2750
2751             str_ptr = va_arg (ap, char **);
2752
2753             if (str_ptr != NULL)
2754               *str_ptr = g_strdup (value);
2755           }
2756           break;
2757
2758         case G_MARKUP_COLLECT_BOOLEAN:
2759         case G_MARKUP_COLLECT_TRISTATE:
2760           if (value == NULL)
2761             {
2762               gboolean *bool_ptr;
2763
2764               bool_ptr = va_arg (ap, gboolean *);
2765
2766               if (bool_ptr != NULL)
2767                 {
2768                   if (type == G_MARKUP_COLLECT_TRISTATE)
2769                     /* constructivists rejoice!
2770                      * neither false nor true...
2771                      */
2772                     *bool_ptr = -1;
2773
2774                   else /* G_MARKUP_COLLECT_BOOLEAN */
2775                     *bool_ptr = FALSE;
2776                 }
2777             }
2778           else
2779             {
2780               if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *)))
2781                 {
2782                   g_set_error (error, G_MARKUP_ERROR,
2783                                G_MARKUP_ERROR_INVALID_CONTENT,
2784                                "element '%s', attribute '%s', value '%s' "
2785                                "cannot be parsed as a boolean value",
2786                                element_name, attr, value);
2787
2788                   va_end (ap);
2789                   goto failure;
2790                 }
2791             }
2792
2793           break;
2794
2795         default:
2796           g_assert_not_reached ();
2797         }
2798
2799       type = va_arg (ap, GMarkupCollectType);
2800       attr = va_arg (ap, const char *);
2801       written++;
2802     }
2803   va_end (ap);
2804
2805   /* ensure we collected all the arguments */
2806   for (i = 0; attribute_names[i]; i++)
2807     if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0)
2808       {
2809         /* attribute not collected:  could be caused by two things.
2810          *
2811          * 1) it doesn't exist in our list of attributes
2812          * 2) it existed but was matched by a duplicate attribute earlier
2813          *
2814          * find out.
2815          */
2816         int j;
2817
2818         for (j = 0; j < i; j++)
2819           if (strcmp (attribute_names[i], attribute_names[j]) == 0)
2820             /* duplicate! */
2821             break;
2822
2823         /* j is now the first occurrence of attribute_names[i] */
2824         if (i == j)
2825           g_set_error (error, G_MARKUP_ERROR,
2826                        G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
2827                        "attribute '%s' invalid for element '%s'",
2828                        attribute_names[i], element_name);
2829         else
2830           g_set_error (error, G_MARKUP_ERROR,
2831                        G_MARKUP_ERROR_INVALID_CONTENT,
2832                        "attribute '%s' given multiple times for element '%s'",
2833                        attribute_names[i], element_name);
2834
2835         goto failure;
2836       }
2837
2838   return TRUE;
2839
2840 failure:
2841   /* replay the above to free allocations */
2842   type = first_type;
2843   attr = first_attr;
2844
2845   va_start (ap, first_attr);
2846   while (type != G_MARKUP_COLLECT_INVALID)
2847     {
2848       gpointer ptr;
2849
2850       ptr = va_arg (ap, gpointer);
2851
2852       if (ptr != NULL)
2853         {
2854           switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1))
2855             {
2856             case G_MARKUP_COLLECT_STRDUP:
2857               if (written)
2858                 g_free (*(char **) ptr);
2859
2860             case G_MARKUP_COLLECT_STRING:
2861               *(char **) ptr = NULL;
2862               break;
2863
2864             case G_MARKUP_COLLECT_BOOLEAN:
2865               *(gboolean *) ptr = FALSE;
2866               break;
2867
2868             case G_MARKUP_COLLECT_TRISTATE:
2869               *(gboolean *) ptr = -1;
2870               break;
2871             }
2872         }
2873
2874       type = va_arg (ap, GMarkupCollectType);
2875       attr = va_arg (ap, const char *);
2876     }
2877   va_end (ap);
2878
2879   return FALSE;
2880 }