glib/gmarkup.c

   1 /* gmarkup.c - Simple XML-like parser
   2  *
   3  *  Copyright 2000, 2003 Red Hat, Inc.
   4  *  Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public License
  17  * along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "config.h"
  21
  22 #include <stdarg.h>
  23 #include <string.h>
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <errno.h>
  27
  28 #include "gmarkup.h"
  29
  30 #include "gatomic.h"
  31 #include "gslice.h"
  32 #include "galloca.h"
  33 #include "gstrfuncs.h"
  34 #include "gstring.h"
  35 #include "gtestutils.h"
  36 #include "glibintl.h"
  37 #include "gthread.h"
  38
  39 /**
  40  * SECTION:markup
  41  * @Title: Simple XML Subset Parser
  42  * @Short_description: parses a subset of XML
  43  * @See_also: [XML Specification](http://www.w3.org/TR/REC-xml/)
  44  *
  45  * The "GMarkup" parser is intended to parse a simple markup format
  46  * that's a subset of XML. This is a small, efficient, easy-to-use
  47  * parser. It should not be used if you expect to interoperate with
  48  * other applications generating full-scale XML. However, it's very
  49  * useful for application data files, config files, etc. where you
  50  * know your application will be the only one writing the file.
  51  * Full-scale XML parsers should be able to parse the subset used by
  52  * GMarkup, so you can easily migrate to full-scale XML at a later
  53  * time if the need arises.
  54  *
  55  * GMarkup is not guaranteed to signal an error on all invalid XML;
  56  * the parser may accept documents that an XML parser would not.
  57  * However, XML documents which are not well-formed (which is a
  58  * weaker condition than being valid. See the
  59  * [XML specification](http://www.w3.org/TR/REC-xml/)
  60  * for definitions of these terms.) are not considered valid GMarkup
  61  * documents.
  62  *
  63  * Simplifications to XML include:
  64  *
  65  * - Only UTF-8 encoding is allowed
  66  *
  67  * - No user-defined entities
  68  *
  69  * - Processing instructions, comments and the doctype declaration
  70  *   are "passed through" but are not interpreted in any way
  71  *
  72  * - No DTD or validation
  73  *
  74  * The markup format does support:
  75  *
  76  * - Elements
  77  *
  78  * - Attributes
  79  *
  80  * - 5 standard entities: &amp; &lt; &gt; &quot; &apos;
  81  *
  82  * - Character references
  83  *
  84  * - Sections marked as CDATA
  85  */
  86
  87 G_DEFINE_QUARK (g-markup-error-quark, g_markup_error)
  88
  89 typedef enum
  90 {
  91   STATE_START,
  92   STATE_AFTER_OPEN_ANGLE,
  93   STATE_AFTER_CLOSE_ANGLE,
  94   STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
  95   STATE_INSIDE_OPEN_TAG_NAME,
  96   STATE_INSIDE_ATTRIBUTE_NAME,
  97   STATE_AFTER_ATTRIBUTE_NAME,
  98   STATE_BETWEEN_ATTRIBUTES,
  99   STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
 100   STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
 101   STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
 102   STATE_INSIDE_TEXT,
 103   STATE_AFTER_CLOSE_TAG_SLASH,
 104   STATE_INSIDE_CLOSE_TAG_NAME,
 105   STATE_AFTER_CLOSE_TAG_NAME,
 106   STATE_INSIDE_PASSTHROUGH,
 107   STATE_ERROR
 108 } GMarkupParseState;
 109
 110 typedef struct
 111 {
 112   const char *prev_element;
 113   const GMarkupParser *prev_parser;
 114   gpointer prev_user_data;
 115 } GMarkupRecursionTracker;
 116
 117 struct _GMarkupParseContext
 118 {
 119   const GMarkupParser *parser;
 120
 121   volatile gint ref_count;
 122
 123   GMarkupParseFlags flags;
 124
 125   gint line_number;
 126   gint char_number;
 127
 128   GMarkupParseState state;
 129
 130   gpointer user_data;
 131   GDestroyNotify dnotify;
 132
 133   /* A piece of character data or an element that
 134    * hasn't "ended" yet so we haven't yet called
 135    * the callback for it.
 136    */
 137   GString *partial_chunk;
 138   GSList *spare_chunks;
 139
 140   GSList *tag_stack;
 141   GSList *tag_stack_gstr;
 142   GSList *spare_list_nodes;
 143
 144   GString **attr_names;
 145   GString **attr_values;
 146   gint cur_attr;
 147   gint alloc_attrs;
 148
 149   const gchar *current_text;
 150   gssize       current_text_len;
 151   const gchar *current_text_end;
 152
 153   /* used to save the start of the last interesting thingy */
 154   const gchar *start;
 155
 156   const gchar *iter;
 157
 158   guint document_empty : 1;
 159   guint parsing : 1;
 160   guint awaiting_pop : 1;
 161   gint balance;
 162
 163   /* subparser support */
 164   GSList *subparser_stack; /* (GMarkupRecursionTracker *) */
 165   const char *subparser_element;
 166   gpointer held_user_data;
 167 };
 168
 169 /*
 170  * Helpers to reduce our allocation overhead, we have
 171  * a well defined allocation lifecycle.
 172  */
 173 static GSList *
 174 get_list_node (GMarkupParseContext *context, gpointer data)
 175 {
 176   GSList *node;
 177   if (context->spare_list_nodes != NULL)
 178     {
 179       node = context->spare_list_nodes;
 180       context->spare_list_nodes = g_slist_remove_link (context->spare_list_nodes, node);
 181     }
 182   else
 183     node = g_slist_alloc();
 184   node->data = data;
 185   return node;
 186 }
 187
 188 static void
 189 free_list_node (GMarkupParseContext *context, GSList *node)
 190 {
 191   node->data = NULL;
 192   context->spare_list_nodes = g_slist_concat (node, context->spare_list_nodes);
 193 }
 194
 195 static inline void
 196 string_blank (GString *string)
 197 {
 198   string->str[0] = '\0';
 199   string->len = 0;
 200 }
 201
 202 /**
 203  * g_markup_parse_context_new:
 204  * @parser: a #GMarkupParser
 205  * @flags: one or more #GMarkupParseFlags
 206  * @user_data: user data to pass to #GMarkupParser functions
 207  * @user_data_dnotify: user data destroy notifier called when
 208  *     the parse context is freed
 209  *
 210  * Creates a new parse context. A parse context is used to parse
 211  * marked-up documents. You can feed any number of documents into
 212  * a context, as long as no errors occur; once an error occurs,
 213  * the parse context can't continue to parse text (you have to
 214  * free it and create a new parse context).
 215  *
 216  * Returns: a new #GMarkupParseContext
 217  **/
 218 GMarkupParseContext *
 219 g_markup_parse_context_new (const GMarkupParser *parser,
 220                             GMarkupParseFlags    flags,
 221                             gpointer             user_data,
 222                             GDestroyNotify       user_data_dnotify)
 223 {
 224   GMarkupParseContext *context;
 225
 226   g_return_val_if_fail (parser != NULL, NULL);
 227
 228   context = g_new (GMarkupParseContext, 1);
 229
 230   context->ref_count = 1;
 231   context->parser = parser;
 232   context->flags = flags;
 233   context->user_data = user_data;
 234   context->dnotify = user_data_dnotify;
 235
 236   context->line_number = 1;
 237   context->char_number = 1;
 238
 239   context->partial_chunk = NULL;
 240   context->spare_chunks = NULL;
 241   context->spare_list_nodes = NULL;
 242
 243   context->state = STATE_START;
 244   context->tag_stack = NULL;
 245   context->tag_stack_gstr = NULL;
 246   context->attr_names = NULL;
 247   context->attr_values = NULL;
 248   context->cur_attr = -1;
 249   context->alloc_attrs = 0;
 250
 251   context->current_text = NULL;
 252   context->current_text_len = -1;
 253   context->current_text_end = NULL;
 254
 255   context->start = NULL;
 256   context->iter = NULL;
 257
 258   context->document_empty = TRUE;
 259   context->parsing = FALSE;
 260
 261   context->awaiting_pop = FALSE;
 262   context->subparser_stack = NULL;
 263   context->subparser_element = NULL;
 264
 265   /* this is only looked at if awaiting_pop = TRUE.  initialise anyway. */
 266   context->held_user_data = NULL;
 267
 268   context->balance = 0;
 269
 270   return context;
 271 }
 272
 273 /**
 274  * g_markup_parse_context_ref:
 275  * @context: a #GMarkupParseContext
 276  *
 277  * Increases the reference count of @context.
 278  *
 279  * Returns: the same @context
 280  *
 281  * Since: 2.36
 282  **/
 283 GMarkupParseContext *
 284 g_markup_parse_context_ref (GMarkupParseContext *context)
 285 {
 286   g_return_val_if_fail (context != NULL, NULL);
 287   g_return_val_if_fail (context->ref_count > 0, NULL);
 288
 289   g_atomic_int_inc (&context->ref_count);
 290
 291   return context;
 292 }
 293
 294 /**
 295  * g_markup_parse_context_unref:
 296  * @context: a #GMarkupParseContext
 297  *
 298  * Decreases the reference count of @context.  When its reference count
 299  * drops to 0, it is freed.
 300  *
 301  * Since: 2.36
 302  **/
 303 void
 304 g_markup_parse_context_unref (GMarkupParseContext *context)
 305 {
 306   g_return_if_fail (context != NULL);
 307   g_return_if_fail (context->ref_count > 0);
 308
 309   if (g_atomic_int_dec_and_test (&context->ref_count))
 310     g_markup_parse_context_free (context);
 311 }
 312
 313 static void
 314 string_full_free (gpointer ptr)
 315 {
 316   g_string_free (ptr, TRUE);
 317 }
 318
 319 static void clear_attributes (GMarkupParseContext *context);
 320
 321 /**
 322  * g_markup_parse_context_free:
 323  * @context: a #GMarkupParseContext
 324  *
 325  * Frees a #GMarkupParseContext.
 326  *
 327  * This function can't be called from inside one of the
 328  * #GMarkupParser functions or while a subparser is pushed.
 329  */
 330 void
 331 g_markup_parse_context_free (GMarkupParseContext *context)
 332 {
 333   g_return_if_fail (context != NULL);
 334   g_return_if_fail (!context->parsing);
 335   g_return_if_fail (!context->subparser_stack);
 336   g_return_if_fail (!context->awaiting_pop);
 337
 338   if (context->dnotify)
 339     (* context->dnotify) (context->user_data);
 340
 341   clear_attributes (context);
 342   g_free (context->attr_names);
 343   g_free (context->attr_values);
 344
 345   g_slist_free_full (context->tag_stack_gstr, string_full_free);
 346   g_slist_free (context->tag_stack);
 347
 348   g_slist_free_full (context->spare_chunks, string_full_free);
 349   g_slist_free (context->spare_list_nodes);
 350
 351   if (context->partial_chunk)
 352     g_string_free (context->partial_chunk, TRUE);
 353
 354   g_free (context);
 355 }
 356
 357 static void pop_subparser_stack (GMarkupParseContext *context);
 358
 359 static void
 360 mark_error (GMarkupParseContext *context,
 361             GError              *error)
 362 {
 363   context->state = STATE_ERROR;
 364
 365   if (context->parser->error)
 366     (*context->parser->error) (context, error, context->user_data);
 367
 368   /* report the error all the way up to free all the user-data */
 369   while (context->subparser_stack)
 370     {
 371       pop_subparser_stack (context);
 372       context->awaiting_pop = FALSE; /* already been freed */
 373
 374       if (context->parser->error)
 375         (*context->parser->error) (context, error, context->user_data);
 376     }
 377 }
 378
 379 static void
 380 set_error (GMarkupParseContext  *context,
 381            GError              **error,
 382            GMarkupError          code,
 383            const gchar          *format,
 384            ...) G_GNUC_PRINTF (4, 5);
 385
 386 static void
 387 set_error_literal (GMarkupParseContext  *context,
 388                    GError              **error,
 389                    GMarkupError          code,
 390                    const gchar          *message)
 391 {
 392   GError *tmp_error;
 393
 394   tmp_error = g_error_new_literal (G_MARKUP_ERROR, code, message);
 395
 396   g_prefix_error (&tmp_error,
 397                   _("Error on line %d char %d: "),
 398                   context->line_number,
 399                   context->char_number);
 400
 401   mark_error (context, tmp_error);
 402
 403   g_propagate_error (error, tmp_error);
 404 }
 405
 406 G_GNUC_PRINTF(4, 5)
 407 static void
 408 set_error (GMarkupParseContext  *context,
 409            GError              **error,
 410            GMarkupError          code,
 411            const gchar          *format,
 412            ...)
 413 {
 414   gchar *s;
 415   gchar *s_valid;
 416   va_list args;
 417
 418   va_start (args, format);
 419   s = g_strdup_vprintf (format, args);
 420   va_end (args);
 421
 422   /* Make sure that the GError message is valid UTF-8
 423    * even if it is complaining about invalid UTF-8 in the markup
 424    */
 425   s_valid = g_utf8_make_valid (s, -1);
 426   set_error_literal (context, error, code, s);
 427
 428   g_free (s);
 429   g_free (s_valid);
 430 }
 431
 432 static void
 433 propagate_error (GMarkupParseContext  *context,
 434                  GError              **dest,
 435                  GError               *src)
 436 {
 437   if (context->flags & G_MARKUP_PREFIX_ERROR_POSITION)
 438     g_prefix_error (&src,
 439                     _("Error on line %d char %d: "),
 440                     context->line_number,
 441                     context->char_number);
 442
 443   mark_error (context, src);
 444
 445   g_propagate_error (dest, src);
 446 }
 447
 448 #define IS_COMMON_NAME_END_CHAR(c) \
 449   ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
 450
 451 static gboolean
 452 slow_name_validate (GMarkupParseContext  *context,
 453                     const gchar          *name,
 454                     GError              **error)
 455 {
 456   const gchar *p = name;
 457
 458   if (!g_utf8_validate (name, strlen (name), NULL))
 459     {
 460       set_error (context, error, G_MARKUP_ERROR_BAD_UTF8,
 461                  _("Invalid UTF-8 encoded text in name - not valid '%s'"), name);
 462       return FALSE;
 463     }
 464
 465   if (!(g_ascii_isalpha (*p) ||
 466         (!IS_COMMON_NAME_END_CHAR (*p) &&
 467          (*p == '_' ||
 468           *p == ':' ||
 469           g_unichar_isalpha (g_utf8_get_char (p))))))
 470     {
 471       set_error (context, error, G_MARKUP_ERROR_PARSE,
 472                  _("'%s' is not a valid name"), name);
 473       return FALSE;
 474     }
 475
 476   for (p = g_utf8_next_char (name); *p != '\0'; p = g_utf8_next_char (p))
 477     {
 478       /* is_name_char */
 479       if (!(g_ascii_isalnum (*p) ||
 480             (!IS_COMMON_NAME_END_CHAR (*p) &&
 481              (*p == '.' ||
 482               *p == '-' ||
 483               *p == '_' ||
 484               *p == ':' ||
 485               g_unichar_isalpha (g_utf8_get_char (p))))))
 486         {
 487           set_error (context, error, G_MARKUP_ERROR_PARSE,
 488                      _("'%s' is not a valid name: '%c'"), name, *p);
 489           return FALSE;
 490         }
 491     }
 492   return TRUE;
 493 }
 494
 495 /*
 496  * Use me for elements, attributes etc.
 497  */
 498 static gboolean
 499 name_validate (GMarkupParseContext  *context,
 500                const gchar          *name,
 501                GError              **error)
 502 {
 503   char mask;
 504   const char *p;
 505
 506   /* name start char */
 507   p = name;
 508   if (G_UNLIKELY (IS_COMMON_NAME_END_CHAR (*p) ||
 509                   !(g_ascii_isalpha (*p) || *p == '_' || *p == ':')))
 510     goto slow_validate;
 511
 512   for (mask = *p++; *p != '\0'; p++)
 513     {
 514       mask |= *p;
 515
 516       /* is_name_char */
 517       if (G_UNLIKELY (!(g_ascii_isalnum (*p) ||
 518                         (!IS_COMMON_NAME_END_CHAR (*p) &&
 519                          (*p == '.' ||
 520                           *p == '-' ||
 521                           *p == '_' ||
 522                           *p == ':')))))
 523         goto slow_validate;
 524     }
 525
 526   if (mask & 0x80) /* un-common / non-ascii */
 527     goto slow_validate;
 528
 529   return TRUE;
 530
 531  slow_validate:
 532   return slow_name_validate (context, name, error);
 533 }
 534
 535 static gboolean
 536 text_validate (GMarkupParseContext  *context,
 537                const gchar          *p,
 538                gint                  len,
 539                GError              **error)
 540 {
 541   if (!g_utf8_validate (p, len, NULL))
 542     {
 543       set_error (context, error, G_MARKUP_ERROR_BAD_UTF8,
 544                  _("Invalid UTF-8 encoded text in name - not valid '%s'"), p);
 545       return FALSE;
 546     }
 547   else
 548     return TRUE;
 549 }
 550
 551 static gchar*
 552 char_str (gunichar c,
 553           gchar   *buf)
 554 {
 555   memset (buf, 0, 8);
 556   g_unichar_to_utf8 (c, buf);
 557   return buf;
 558 }
 559
 560 static gchar*
 561 utf8_str (const gchar *utf8,
 562           gchar       *buf)
 563 {
 564   char_str (g_utf8_get_char (utf8), buf);
 565   return buf;
 566 }
 567
 568 G_GNUC_PRINTF(5, 6)
 569 static void
 570 set_unescape_error (GMarkupParseContext  *context,
 571                     GError              **error,
 572                     const gchar          *remaining_text,
 573                     GMarkupError          code,
 574                     const gchar          *format,
 575                     ...)
 576 {
 577   GError *tmp_error;
 578   gchar *s;
 579   va_list args;
 580   gint remaining_newlines;
 581   const gchar *p;
 582
 583   remaining_newlines = 0;
 584   p = remaining_text;
 585   while (*p != '\0')
 586     {
 587       if (*p == '\n')
 588         ++remaining_newlines;
 589       ++p;
 590     }
 591
 592   va_start (args, format);
 593   s = g_strdup_vprintf (format, args);
 594   va_end (args);
 595
 596   tmp_error = g_error_new (G_MARKUP_ERROR,
 597                            code,
 598                            _("Error on line %d: %s"),
 599                            context->line_number - remaining_newlines,
 600                            s);
 601
 602   g_free (s);
 603
 604   mark_error (context, tmp_error);
 605
 606   g_propagate_error (error, tmp_error);
 607 }
 608
 609 /*
 610  * re-write the GString in-place, unescaping anything that escaped.
 611  * most XML does not contain entities, or escaping.
 612  */
 613 static gboolean
 614 unescape_gstring_inplace (GMarkupParseContext  *context,
 615                           GString              *string,
 616                           gboolean             *is_ascii,
 617                           GError              **error)
 618 {
 619   char mask, *to;
 620   const char *from;
 621   gboolean normalize_attribute;
 622
 623   *is_ascii = FALSE;
 624
 625   /* are we unescaping an attribute or not ? */
 626   if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
 627       context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
 628     normalize_attribute = TRUE;
 629   else
 630     normalize_attribute = FALSE;
 631
 632   /*
 633    * Meeks' theorem: unescaping can only shrink text.
 634    * for &lt; etc. this is obvious, for &#xffff; more
 635    * thought is required, but this is patently so.
 636    */
 637   mask = 0;
 638   for (from = to = string->str; *from != '\0'; from++, to++)
 639     {
 640       *to = *from;
 641
 642       mask |= *to;
 643       if (normalize_attribute && (*to == '\t' || *to == '\n'))
 644         *to = ' ';
 645       if (*to == '\r')
 646         {
 647           *to = normalize_attribute ? ' ' : '\n';
 648           if (from[1] == '\n')
 649             from++;
 650         }
 651       if (*from == '&')
 652         {
 653           from++;
 654           if (*from == '#')
 655             {
 656               gint base = 10;
 657               gulong l;
 658               gchar *end = NULL;
 659
 660               from++;
 661
 662               if (*from == 'x')
 663                 {
 664                   base = 16;
 665                   from++;
 666                 }
 667
 668               errno = 0;
 669               l = strtoul (from, &end, base);
 670
 671               if (end == from || errno != 0)
 672                 {
 673                   set_unescape_error (context, error,
 674                                       from, G_MARKUP_ERROR_PARSE,
 675                                       _("Failed to parse '%-.*s', which "
 676                                         "should have been a digit "
 677                                         "inside a character reference "
 678                                         "(&#234; for example) - perhaps "
 679                                         "the digit is too large"),
 680                                       (int)(end - from), from);
 681                   return FALSE;
 682                 }
 683               else if (*end != ';')
 684                 {
 685                   set_unescape_error (context, error,
 686                                       from, G_MARKUP_ERROR_PARSE,
 687                                       _("Character reference did not end with a "
 688                                         "semicolon; "
 689                                         "most likely you used an ampersand "
 690                                         "character without intending to start "
 691                                         "an entity - escape ampersand as &amp;"));
 692                   return FALSE;
 693                 }
 694               else
 695                 {
 696                   /* characters XML 1.1 permits */
 697                   if ((0 < l && l <= 0xD7FF) ||
 698                       (0xE000 <= l && l <= 0xFFFD) ||
 699                       (0x10000 <= l && l <= 0x10FFFF))
 700                     {
 701                       gchar buf[8];
 702                       char_str (l, buf);
 703                       strcpy (to, buf);
 704                       to += strlen (buf) - 1;
 705                       from = end;
 706                       if (l >= 0x80) /* not ascii */
 707                         mask |= 0x80;
 708                     }
 709                   else
 710                     {
 711                       set_unescape_error (context, error,
 712                                           from, G_MARKUP_ERROR_PARSE,
 713                                           _("Character reference '%-.*s' does not "
 714                                             "encode a permitted character"),
 715                                           (int)(end - from), from);
 716                       return FALSE;
 717                     }
 718                 }
 719             }
 720
 721           else if (strncmp (from, "lt;", 3) == 0)
 722             {
 723               *to = '<';
 724               from += 2;
 725             }
 726           else if (strncmp (from, "gt;", 3) == 0)
 727             {
 728               *to = '>';
 729               from += 2;
 730             }
 731           else if (strncmp (from, "amp;", 4) == 0)
 732             {
 733               *to = '&';
 734               from += 3;
 735             }
 736           else if (strncmp (from, "quot;", 5) == 0)
 737             {
 738               *to = '"';
 739               from += 4;
 740             }
 741           else if (strncmp (from, "apos;", 5) == 0)
 742             {
 743               *to = '\'';
 744               from += 4;
 745             }
 746           else
 747             {
 748               if (*from == ';')
 749                 set_unescape_error (context, error,
 750                                     from, G_MARKUP_ERROR_PARSE,
 751                                     _("Empty entity '&;' seen; valid "
 752                                       "entities are: &amp; &quot; &lt; &gt; &apos;"));
 753               else
 754                 {
 755                   const char *end = strchr (from, ';');
 756                   if (end)
 757                     set_unescape_error (context, error,
 758                                         from, G_MARKUP_ERROR_PARSE,
 759                                         _("Entity name '%-.*s' is not known"),
 760                                         (int)(end - from), from);
 761                   else
 762                     set_unescape_error (context, error,
 763                                         from, G_MARKUP_ERROR_PARSE,
 764                                         _("Entity did not end with a semicolon; "
 765                                           "most likely you used an ampersand "
 766                                           "character without intending to start "
 767                                           "an entity - escape ampersand as &amp;"));
 768                 }
 769               return FALSE;
 770             }
 771         }
 772     }
 773
 774   g_assert (to - string->str <= string->len);
 775   if (to - string->str != string->len)
 776     g_string_truncate (string, to - string->str);
 777
 778   *is_ascii = !(mask & 0x80);
 779
 780   return TRUE;
 781 }
 782
 783 static inline gboolean
 784 advance_char (GMarkupParseContext *context)
 785 {
 786   context->iter++;
 787   context->char_number++;
 788
 789   if (G_UNLIKELY (context->iter == context->current_text_end))
 790       return FALSE;
 791
 792   else if (G_UNLIKELY (*context->iter == '\n'))
 793     {
 794       context->line_number++;
 795       context->char_number = 1;
 796     }
 797
 798   return TRUE;
 799 }
 800
 801 static inline gboolean
 802 xml_isspace (char c)
 803 {
 804   return c == ' ' || c == '\t' || c == '\n' || c == '\r';
 805 }
 806
 807 static void
 808 skip_spaces (GMarkupParseContext *context)
 809 {
 810   do
 811     {
 812       if (!xml_isspace (*context->iter))
 813         return;
 814     }
 815   while (advance_char (context));
 816 }
 817
 818 static void
 819 advance_to_name_end (GMarkupParseContext *context)
 820 {
 821   do
 822     {
 823       if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
 824         return;
 825       if (xml_isspace (*(context->iter)))
 826         return;
 827     }
 828   while (advance_char (context));
 829 }
 830
 831 static void
 832 release_chunk (GMarkupParseContext *context, GString *str)
 833 {
 834   GSList *node;
 835   if (!str)
 836     return;
 837   if (str->allocated_len > 256)
 838     { /* large strings are unusual and worth freeing */
 839       g_string_free (str, TRUE);
 840       return;
 841     }
 842   string_blank (str);
 843   node = get_list_node (context, str);
 844   context->spare_chunks = g_slist_concat (node, context->spare_chunks);
 845 }
 846
 847 static void
 848 add_to_partial (GMarkupParseContext *context,
 849                 const gchar         *text_start,
 850                 const gchar         *text_end)
 851 {
 852   if (context->partial_chunk == NULL)
 853     { /* allocate a new chunk to parse into */
 854
 855       if (context->spare_chunks != NULL)
 856         {
 857           GSList *node = context->spare_chunks;
 858           context->spare_chunks = g_slist_remove_link (context->spare_chunks, node);
 859           context->partial_chunk = node->data;
 860           free_list_node (context, node);
 861         }
 862       else
 863         context->partial_chunk = g_string_sized_new (MAX (28, text_end - text_start));
 864     }
 865
 866   if (text_start != text_end)
 867     g_string_insert_len (context->partial_chunk, -1,
 868                          text_start, text_end - text_start);
 869 }
 870
 871 static inline void
 872 truncate_partial (GMarkupParseContext *context)
 873 {
 874   if (context->partial_chunk != NULL)
 875     string_blank (context->partial_chunk);
 876 }
 877
 878 static inline const gchar*
 879 current_element (GMarkupParseContext *context)
 880 {
 881   return context->tag_stack->data;
 882 }
 883
 884 static void
 885 pop_subparser_stack (GMarkupParseContext *context)
 886 {
 887   GMarkupRecursionTracker *tracker;
 888
 889   g_assert (context->subparser_stack);
 890
 891   tracker = context->subparser_stack->data;
 892
 893   context->awaiting_pop = TRUE;
 894   context->held_user_data = context->user_data;
 895
 896   context->user_data = tracker->prev_user_data;
 897   context->parser = tracker->prev_parser;
 898   context->subparser_element = tracker->prev_element;
 899   g_slice_free (GMarkupRecursionTracker, tracker);
 900
 901   context->subparser_stack = g_slist_delete_link (context->subparser_stack,
 902                                                   context->subparser_stack);
 903 }
 904
 905 static void
 906 push_partial_as_tag (GMarkupParseContext *context)
 907 {
 908   GString *str = context->partial_chunk;
 909   /* sadly, this is exported by gmarkup_get_element_stack as-is */
 910   context->tag_stack = g_slist_concat (get_list_node (context, str->str), context->tag_stack);
 911   context->tag_stack_gstr = g_slist_concat (get_list_node (context, str), context->tag_stack_gstr);
 912   context->partial_chunk = NULL;
 913 }
 914
 915 static void
 916 pop_tag (GMarkupParseContext *context)
 917 {
 918   GSList *nodea, *nodeb;
 919
 920   nodea = context->tag_stack;
 921   nodeb = context->tag_stack_gstr;
 922   release_chunk (context, nodeb->data);
 923   context->tag_stack = g_slist_remove_link (context->tag_stack, nodea);
 924   context->tag_stack_gstr = g_slist_remove_link (context->tag_stack_gstr, nodeb);
 925   free_list_node (context, nodea);
 926   free_list_node (context, nodeb);
 927 }
 928
 929 static void
 930 possibly_finish_subparser (GMarkupParseContext *context)
 931 {
 932   if (current_element (context) == context->subparser_element)
 933     pop_subparser_stack (context);
 934 }
 935
 936 static void
 937 ensure_no_outstanding_subparser (GMarkupParseContext *context)
 938 {
 939   if (context->awaiting_pop)
 940     g_critical ("During the first end_element call after invoking a "
 941                 "subparser you must pop the subparser stack and handle "
 942                 "the freeing of the subparser user_data.  This can be "
 943                 "done by calling the end function of the subparser.  "
 944                 "Very probably, your program just leaked memory.");
 945
 946   /* let valgrind watch the pointer disappear... */
 947   context->held_user_data = NULL;
 948   context->awaiting_pop = FALSE;
 949 }
 950
 951 static const gchar*
 952 current_attribute (GMarkupParseContext *context)
 953 {
 954   g_assert (context->cur_attr >= 0);
 955   return context->attr_names[context->cur_attr]->str;
 956 }
 957
 958 static void
 959 add_attribute (GMarkupParseContext *context, GString *str)
 960 {
 961   if (context->cur_attr + 2 >= context->alloc_attrs)
 962     {
 963       context->alloc_attrs += 5; /* silly magic number */
 964       context->attr_names = g_realloc (context->attr_names, sizeof(GString*)*context->alloc_attrs);
 965       context->attr_values = g_realloc (context->attr_values, sizeof(GString*)*context->alloc_attrs);
 966     }
 967   context->cur_attr++;
 968   context->attr_names[context->cur_attr] = str;
 969   context->attr_values[context->cur_attr] = NULL;
 970   context->attr_names[context->cur_attr+1] = NULL;
 971   context->attr_values[context->cur_attr+1] = NULL;
 972 }
 973
 974 static void
 975 clear_attributes (GMarkupParseContext *context)
 976 {
 977   /* Go ahead and free the attributes. */
 978   for (; context->cur_attr >= 0; context->cur_attr--)
 979     {
 980       int pos = context->cur_attr;
 981       release_chunk (context, context->attr_names[pos]);
 982       release_chunk (context, context->attr_values[pos]);
 983       context->attr_names[pos] = context->attr_values[pos] = NULL;
 984     }
 985   g_assert (context->cur_attr == -1);
 986   g_assert (context->attr_names == NULL ||
 987             context->attr_names[0] == NULL);
 988   g_assert (context->attr_values == NULL ||
 989             context->attr_values[0] == NULL);
 990 }
 991
 992 /* This has to be a separate function to ensure the alloca's
 993  * are unwound on exit - otherwise we grow & blow the stack
 994  * with large documents
 995  */
 996 static inline void
 997 emit_start_element (GMarkupParseContext  *context,
 998                     GError              **error)
 999 {
1000   int i, j = 0;
1001   const gchar *start_name;
1002   const gchar **attr_names;
1003   const gchar **attr_values;
1004   GError *tmp_error;
1005
1006   /* In case we want to ignore qualified tags and we see that we have
1007    * one here, we push a subparser.  This will ignore all tags inside of
1008    * the qualified tag.
1009    *
1010    * We deal with the end of the subparser from emit_end_element.
1011    */
1012   if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (current_element (context), ':'))
1013     {
1014       static const GMarkupParser ignore_parser;
1015       g_markup_parse_context_push (context, &ignore_parser, NULL);
1016       clear_attributes (context);
1017       return;
1018     }
1019
1020   attr_names = g_newa (const gchar *, context->cur_attr + 2);
1021   attr_values = g_newa (const gchar *, context->cur_attr + 2);
1022   for (i = 0; i < context->cur_attr + 1; i++)
1023     {
1024       /* Possibly omit qualified attribute names from the list */
1025       if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (context->attr_names[i]->str, ':'))
1026         continue;
1027
1028       attr_names[j] = context->attr_names[i]->str;
1029       attr_values[j] = context->attr_values[i]->str;
1030       j++;
1031     }
1032   attr_names[j] = NULL;
1033   attr_values[j] = NULL;
1034
1035   /* Call user callback for element start */
1036   tmp_error = NULL;
1037   start_name = current_element (context);
1038
1039   if (context->parser->start_element &&
1040       name_validate (context, start_name, error))
1041     (* context->parser->start_element) (context,
1042                                         start_name,
1043                                         (const gchar **)attr_names,
1044                                         (const gchar **)attr_values,
1045                                         context->user_data,
1046                                         &tmp_error);
1047   clear_attributes (context);
1048
1049   if (tmp_error != NULL)
1050     propagate_error (context, error, tmp_error);
1051 }
1052
1053 static void
1054 emit_end_element (GMarkupParseContext  *context,
1055                   GError              **error)
1056 {
1057   /* We need to pop the tag stack and call the end_element
1058    * function, since this is the close tag
1059    */
1060   GError *tmp_error = NULL;
1061
1062   g_assert (context->tag_stack != NULL);
1063
1064   possibly_finish_subparser (context);
1065
1066   /* We might have just returned from our ignore subparser */
1067   if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (current_element (context), ':'))
1068     {
1069       g_markup_parse_context_pop (context);
1070       pop_tag (context);
1071       return;
1072     }
1073
1074   tmp_error = NULL;
1075   if (context->parser->end_element)
1076     (* context->parser->end_element) (context,
1077                                       current_element (context),
1078                                       context->user_data,
1079                                       &tmp_error);
1080
1081   ensure_no_outstanding_subparser (context);
1082
1083   if (tmp_error)
1084     {
1085       mark_error (context, tmp_error);
1086       g_propagate_error (error, tmp_error);
1087     }
1088
1089   pop_tag (context);
1090 }
1091
1092 /**
1093  * g_markup_parse_context_parse:
1094  * @context: a #GMarkupParseContext
1095  * @text: chunk of text to parse
1096  * @text_len: length of @text in bytes
1097  * @error: return location for a #GError
1098  *
1099  * Feed some data to the #GMarkupParseContext.
1100  *
1101  * The data need not be valid UTF-8; an error will be signaled if
1102  * it's invalid. The data need not be an entire document; you can
1103  * feed a document into the parser incrementally, via multiple calls
1104  * to this function. Typically, as you receive data from a network
1105  * connection or file, you feed each received chunk of data into this
1106  * function, aborting the process if an error occurs. Once an error
1107  * is reported, no further data may be fed to the #GMarkupParseContext;
1108  * all errors are fatal.
1109  *
1110  * Returns: %FALSE if an error occurred, %TRUE on success
1111  */
1112 gboolean
1113 g_markup_parse_context_parse (GMarkupParseContext  *context,
1114                               const gchar          *text,
1115                               gssize                text_len,
1116                               GError              **error)
1117 {
1118   g_return_val_if_fail (context != NULL, FALSE);
1119   g_return_val_if_fail (text != NULL, FALSE);
1120   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1121   g_return_val_if_fail (!context->parsing, FALSE);
1122
1123   if (text_len < 0)
1124     text_len = strlen (text);
1125
1126   if (text_len == 0)
1127     return TRUE;
1128
1129   context->parsing = TRUE;
1130
1131
1132   context->current_text = text;
1133   context->current_text_len = text_len;
1134   context->current_text_end = context->current_text + text_len;
1135   context->iter = context->current_text;
1136   context->start = context->iter;
1137
1138   while (context->iter != context->current_text_end)
1139     {
1140       switch (context->state)
1141         {
1142         case STATE_START:
1143           /* Possible next state: AFTER_OPEN_ANGLE */
1144
1145           g_assert (context->tag_stack == NULL);
1146
1147           /* whitespace is ignored outside of any elements */
1148           skip_spaces (context);
1149
1150           if (context->iter != context->current_text_end)
1151             {
1152               if (*context->iter == '<')
1153                 {
1154                   /* Move after the open angle */
1155                   advance_char (context);
1156
1157                   context->state = STATE_AFTER_OPEN_ANGLE;
1158
1159                   /* this could start a passthrough */
1160                   context->start = context->iter;
1161
1162                   /* document is now non-empty */
1163                   context->document_empty = FALSE;
1164                 }
1165               else
1166                 {
1167                   set_error_literal (context,
1168                                      error,
1169                                      G_MARKUP_ERROR_PARSE,
1170                                      _("Document must begin with an element (e.g. <book>)"));
1171                 }
1172             }
1173           break;
1174
1175         case STATE_AFTER_OPEN_ANGLE:
1176           /* Possible next states: INSIDE_OPEN_TAG_NAME,
1177            *  AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
1178            */
1179           if (*context->iter == '?' ||
1180               *context->iter == '!')
1181             {
1182               /* include < in the passthrough */
1183               const gchar *openangle = "<";
1184               add_to_partial (context, openangle, openangle + 1);
1185               context->start = context->iter;
1186               context->balance = 1;
1187               context->state = STATE_INSIDE_PASSTHROUGH;
1188             }
1189           else if (*context->iter == '/')
1190             {
1191               /* move after it */
1192               advance_char (context);
1193
1194               context->state = STATE_AFTER_CLOSE_TAG_SLASH;
1195             }
1196           else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1197             {
1198               context->state = STATE_INSIDE_OPEN_TAG_NAME;
1199
1200               /* start of tag name */
1201               context->start = context->iter;
1202             }
1203           else
1204             {
1205               gchar buf[8];
1206
1207               set_error (context,
1208                          error,
1209                          G_MARKUP_ERROR_PARSE,
1210                          _("'%s' is not a valid character following "
1211                            "a '<' character; it may not begin an "
1212                            "element name"),
1213                          utf8_str (context->iter, buf));
1214             }
1215           break;
1216
1217           /* The AFTER_CLOSE_ANGLE state is actually sort of
1218            * broken, because it doesn't correspond to a range
1219            * of characters in the input stream as the others do,
1220            * and thus makes things harder to conceptualize
1221            */
1222         case STATE_AFTER_CLOSE_ANGLE:
1223           /* Possible next states: INSIDE_TEXT, STATE_START */
1224           if (context->tag_stack == NULL)
1225             {
1226               context->start = NULL;
1227               context->state = STATE_START;
1228             }
1229           else
1230             {
1231               context->start = context->iter;
1232               context->state = STATE_INSIDE_TEXT;
1233             }
1234           break;
1235
1236         case STATE_AFTER_ELISION_SLASH:
1237           /* Possible next state: AFTER_CLOSE_ANGLE */
1238           if (*context->iter == '>')
1239             {
1240               /* move after the close angle */
1241               advance_char (context);
1242               context->state = STATE_AFTER_CLOSE_ANGLE;
1243               emit_end_element (context, error);
1244             }
1245           else
1246             {
1247               gchar buf[8];
1248
1249               set_error (context,
1250                          error,
1251                          G_MARKUP_ERROR_PARSE,
1252                          _("Odd character '%s', expected a '>' character "
1253                            "to end the empty-element tag '%s'"),
1254                          utf8_str (context->iter, buf),
1255                          current_element (context));
1256             }
1257           break;
1258
1259         case STATE_INSIDE_OPEN_TAG_NAME:
1260           /* Possible next states: BETWEEN_ATTRIBUTES */
1261
1262           /* if there's a partial chunk then it's the first part of the
1263            * tag name. If there's a context->start then it's the start
1264            * of the tag name in current_text, the partial chunk goes
1265            * before that start though.
1266            */
1267           advance_to_name_end (context);
1268
1269           if (context->iter == context->current_text_end)
1270             {
1271               /* The name hasn't necessarily ended. Merge with
1272                * partial chunk, leave state unchanged.
1273                */
1274               add_to_partial (context, context->start, context->iter);
1275             }
1276           else
1277             {
1278               /* The name has ended. Combine it with the partial chunk
1279                * if any; push it on the stack; enter next state.
1280                */
1281               add_to_partial (context, context->start, context->iter);
1282               push_partial_as_tag (context);
1283
1284               context->state = STATE_BETWEEN_ATTRIBUTES;
1285               context->start = NULL;
1286             }
1287           break;
1288
1289         case STATE_INSIDE_ATTRIBUTE_NAME:
1290           /* Possible next states: AFTER_ATTRIBUTE_NAME */
1291
1292           advance_to_name_end (context);
1293           add_to_partial (context, context->start, context->iter);
1294
1295           /* read the full name, if we enter the equals sign state
1296            * then add the attribute to the list (without the value),
1297            * otherwise store a partial chunk to be prepended later.
1298            */
1299           if (context->iter != context->current_text_end)
1300             context->state = STATE_AFTER_ATTRIBUTE_NAME;
1301           break;
1302
1303         case STATE_AFTER_ATTRIBUTE_NAME:
1304           /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1305
1306           skip_spaces (context);
1307
1308           if (context->iter != context->current_text_end)
1309             {
1310               /* The name has ended. Combine it with the partial chunk
1311                * if any; push it on the stack; enter next state.
1312                */
1313               if (!name_validate (context, context->partial_chunk->str, error))
1314                 break;
1315
1316               add_attribute (context, context->partial_chunk);
1317
1318               context->partial_chunk = NULL;
1319               context->start = NULL;
1320
1321               if (*context->iter == '=')
1322                 {
1323                   advance_char (context);
1324                   context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1325                 }
1326               else
1327                 {
1328                   gchar buf[8];
1329
1330                   set_error (context,
1331                              error,
1332                              G_MARKUP_ERROR_PARSE,
1333                              _("Odd character '%s', expected a '=' after "
1334                                "attribute name '%s' of element '%s'"),
1335                              utf8_str (context->iter, buf),
1336                              current_attribute (context),
1337                              current_element (context));
1338
1339                 }
1340             }
1341           break;
1342
1343         case STATE_BETWEEN_ATTRIBUTES:
1344           /* Possible next states: AFTER_CLOSE_ANGLE,
1345            * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1346            */
1347           skip_spaces (context);
1348
1349           if (context->iter != context->current_text_end)
1350             {
1351               if (*context->iter == '/')
1352                 {
1353                   advance_char (context);
1354                   context->state = STATE_AFTER_ELISION_SLASH;
1355                 }
1356               else if (*context->iter == '>')
1357                 {
1358                   advance_char (context);
1359                   context->state = STATE_AFTER_CLOSE_ANGLE;
1360                 }
1361               else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1362                 {
1363                   context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1364                   /* start of attribute name */
1365                   context->start = context->iter;
1366                 }
1367               else
1368                 {
1369                   gchar buf[8];
1370
1371                   set_error (context,
1372                              error,
1373                              G_MARKUP_ERROR_PARSE,
1374                              _("Odd character '%s', expected a '>' or '/' "
1375                                "character to end the start tag of "
1376                                "element '%s', or optionally an attribute; "
1377                                "perhaps you used an invalid character in "
1378                                "an attribute name"),
1379                              utf8_str (context->iter, buf),
1380                              current_element (context));
1381                 }
1382
1383               /* If we're done with attributes, invoke
1384                * the start_element callback
1385                */
1386               if (context->state == STATE_AFTER_ELISION_SLASH ||
1387                   context->state == STATE_AFTER_CLOSE_ANGLE)
1388                 emit_start_element (context, error);
1389             }
1390           break;
1391
1392         case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1393           /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1394
1395           skip_spaces (context);
1396
1397           if (context->iter != context->current_text_end)
1398             {
1399               if (*context->iter == '"')
1400                 {
1401                   advance_char (context);
1402                   context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1403                   context->start = context->iter;
1404                 }
1405               else if (*context->iter == '\'')
1406                 {
1407                   advance_char (context);
1408                   context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1409                   context->start = context->iter;
1410                 }
1411               else
1412                 {
1413                   gchar buf[8];
1414
1415                   set_error (context,
1416                              error,
1417                              G_MARKUP_ERROR_PARSE,
1418                              _("Odd character '%s', expected an open quote mark "
1419                                "after the equals sign when giving value for "
1420                                "attribute '%s' of element '%s'"),
1421                              utf8_str (context->iter, buf),
1422                              current_attribute (context),
1423                              current_element (context));
1424                 }
1425             }
1426           break;
1427
1428         case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1429         case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1430           /* Possible next states: BETWEEN_ATTRIBUTES */
1431           {
1432             gchar delim;
1433
1434             if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1435               {
1436                 delim = '\'';
1437               }
1438             else
1439               {
1440                 delim = '"';
1441               }
1442
1443             do
1444               {
1445                 if (*context->iter == delim)
1446                   break;
1447               }
1448             while (advance_char (context));
1449           }
1450           if (context->iter == context->current_text_end)
1451             {
1452               /* The value hasn't necessarily ended. Merge with
1453                * partial chunk, leave state unchanged.
1454                */
1455               add_to_partial (context, context->start, context->iter);
1456             }
1457           else
1458             {
1459               gboolean is_ascii;
1460               /* The value has ended at the quote mark. Combine it
1461                * with the partial chunk if any; set it for the current
1462                * attribute.
1463                */
1464               add_to_partial (context, context->start, context->iter);
1465
1466               g_assert (context->cur_attr >= 0);
1467
1468               if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) &&
1469                   (is_ascii || text_validate (context, context->partial_chunk->str,
1470                                               context->partial_chunk->len, error)))
1471                 {
1472                   /* success, advance past quote and set state. */
1473                   context->attr_values[context->cur_attr] = context->partial_chunk;
1474                   context->partial_chunk = NULL;
1475                   advance_char (context);
1476                   context->state = STATE_BETWEEN_ATTRIBUTES;
1477                   context->start = NULL;
1478                 }
1479
1480               truncate_partial (context);
1481             }
1482           break;
1483
1484         case STATE_INSIDE_TEXT:
1485           /* Possible next states: AFTER_OPEN_ANGLE */
1486           do
1487             {
1488               if (*context->iter == '<')
1489                 break;
1490             }
1491           while (advance_char (context));
1492
1493           /* The text hasn't necessarily ended. Merge with
1494            * partial chunk, leave state unchanged.
1495            */
1496
1497           add_to_partial (context, context->start, context->iter);
1498
1499           if (context->iter != context->current_text_end)
1500             {
1501               gboolean is_ascii;
1502
1503               /* The text has ended at the open angle. Call the text
1504                * callback.
1505                */
1506               if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) &&
1507                   (is_ascii || text_validate (context, context->partial_chunk->str,
1508                                               context->partial_chunk->len, error)))
1509                 {
1510                   GError *tmp_error = NULL;
1511
1512                   if (context->parser->text)
1513                     (*context->parser->text) (context,
1514                                               context->partial_chunk->str,
1515                                               context->partial_chunk->len,
1516                                               context->user_data,
1517                                               &tmp_error);
1518
1519                   if (tmp_error == NULL)
1520                     {
1521                       /* advance past open angle and set state. */
1522                       advance_char (context);
1523                       context->state = STATE_AFTER_OPEN_ANGLE;
1524                       /* could begin a passthrough */
1525                       context->start = context->iter;
1526                     }
1527                   else
1528                     propagate_error (context, error, tmp_error);
1529                 }
1530
1531               truncate_partial (context);
1532             }
1533           break;
1534
1535         case STATE_AFTER_CLOSE_TAG_SLASH:
1536           /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1537           if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1538             {
1539               context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1540
1541               /* start of tag name */
1542               context->start = context->iter;
1543             }
1544           else
1545             {
1546               gchar buf[8];
1547
1548               set_error (context,
1549                          error,
1550                          G_MARKUP_ERROR_PARSE,
1551                          _("'%s' is not a valid character following "
1552                            "the characters '</'; '%s' may not begin an "
1553                            "element name"),
1554                          utf8_str (context->iter, buf),
1555                          utf8_str (context->iter, buf));
1556             }
1557           break;
1558
1559         case STATE_INSIDE_CLOSE_TAG_NAME:
1560           /* Possible next state: AFTER_CLOSE_TAG_NAME */
1561           advance_to_name_end (context);
1562           add_to_partial (context, context->start, context->iter);
1563
1564           if (context->iter != context->current_text_end)
1565             context->state = STATE_AFTER_CLOSE_TAG_NAME;
1566           break;
1567
1568         case STATE_AFTER_CLOSE_TAG_NAME:
1569           /* Possible next state: AFTER_CLOSE_TAG_SLASH */
1570
1571           skip_spaces (context);
1572
1573           if (context->iter != context->current_text_end)
1574             {
1575               GString *close_name;
1576
1577               close_name = context->partial_chunk;
1578               context->partial_chunk = NULL;
1579
1580               if (*context->iter != '>')
1581                 {
1582                   gchar buf[8];
1583
1584                   set_error (context,
1585                              error,
1586                              G_MARKUP_ERROR_PARSE,
1587                              _("'%s' is not a valid character following "
1588                                "the close element name '%s'; the allowed "
1589                                "character is '>'"),
1590                              utf8_str (context->iter, buf),
1591                              close_name->str);
1592                 }
1593               else if (context->tag_stack == NULL)
1594                 {
1595                   set_error (context,
1596                              error,
1597                              G_MARKUP_ERROR_PARSE,
1598                              _("Element '%s' was closed, no element "
1599                                "is currently open"),
1600                              close_name->str);
1601                 }
1602               else if (strcmp (close_name->str, current_element (context)) != 0)
1603                 {
1604                   set_error (context,
1605                              error,
1606                              G_MARKUP_ERROR_PARSE,
1607                              _("Element '%s' was closed, but the currently "
1608                                "open element is '%s'"),
1609                              close_name->str,
1610                              current_element (context));
1611                 }
1612               else
1613                 {
1614                   advance_char (context);
1615                   context->state = STATE_AFTER_CLOSE_ANGLE;
1616                   context->start = NULL;
1617
1618                   emit_end_element (context, error);
1619                 }
1620               context->partial_chunk = close_name;
1621               truncate_partial (context);
1622             }
1623           break;
1624
1625         case STATE_INSIDE_PASSTHROUGH:
1626           /* Possible next state: AFTER_CLOSE_ANGLE */
1627           do
1628             {
1629               if (*context->iter == '<')
1630                 context->balance++;
1631               if (*context->iter == '>')
1632                 {
1633                   gchar *str;
1634                   gsize len;
1635
1636                   context->balance--;
1637                   add_to_partial (context, context->start, context->iter);
1638                   context->start = context->iter;
1639
1640                   str = context->partial_chunk->str;
1641                   len = context->partial_chunk->len;
1642
1643                   if (str[1] == '?' && str[len - 1] == '?')
1644                     break;
1645                   if (strncmp (str, "<!--", 4) == 0 &&
1646                       strcmp (str + len - 2, "--") == 0)
1647                     break;
1648                   if (strncmp (str, "<![CDATA[", 9) == 0 &&
1649                       strcmp (str + len - 2, "]]") == 0)
1650                     break;
1651                   if (strncmp (str, "<!DOCTYPE", 9) == 0 &&
1652                       context->balance == 0)
1653                     break;
1654                 }
1655             }
1656           while (advance_char (context));
1657
1658           if (context->iter == context->current_text_end)
1659             {
1660               /* The passthrough hasn't necessarily ended. Merge with
1661                * partial chunk, leave state unchanged.
1662                */
1663                add_to_partial (context, context->start, context->iter);
1664             }
1665           else
1666             {
1667               /* The passthrough has ended at the close angle. Combine
1668                * it with the partial chunk if any. Call the passthrough
1669                * callback. Note that the open/close angles are
1670                * included in the text of the passthrough.
1671                */
1672               GError *tmp_error = NULL;
1673
1674               advance_char (context); /* advance past close angle */
1675               add_to_partial (context, context->start, context->iter);
1676
1677               if (context->flags & G_MARKUP_TREAT_CDATA_AS_TEXT &&
1678                   strncmp (context->partial_chunk->str, "<![CDATA[", 9) == 0)
1679                 {
1680                   if (context->parser->text &&
1681                       text_validate (context,
1682                                      context->partial_chunk->str + 9,
1683                                      context->partial_chunk->len - 12,
1684                                      error))
1685                     (*context->parser->text) (context,
1686                                               context->partial_chunk->str + 9,
1687                                               context->partial_chunk->len - 12,
1688                                               context->user_data,
1689                                               &tmp_error);
1690                 }
1691               else if (context->parser->passthrough &&
1692                        text_validate (context,
1693                                       context->partial_chunk->str,
1694                                       context->partial_chunk->len,
1695                                       error))
1696                 (*context->parser->passthrough) (context,
1697                                                  context->partial_chunk->str,
1698                                                  context->partial_chunk->len,
1699                                                  context->user_data,
1700                                                  &tmp_error);
1701
1702               truncate_partial (context);
1703
1704               if (tmp_error == NULL)
1705                 {
1706                   context->state = STATE_AFTER_CLOSE_ANGLE;
1707                   context->start = context->iter; /* could begin text */
1708                 }
1709               else
1710                 propagate_error (context, error, tmp_error);
1711             }
1712           break;
1713
1714         case STATE_ERROR:
1715           goto finished;
1716           break;
1717
1718         default:
1719           g_assert_not_reached ();
1720           break;
1721         }
1722     }
1723
1724  finished:
1725   context->parsing = FALSE;
1726
1727   return context->state != STATE_ERROR;
1728 }
1729
1730 /**
1731  * g_markup_parse_context_end_parse:
1732  * @context: a #GMarkupParseContext
1733  * @error: return location for a #GError
1734  *
1735  * Signals to the #GMarkupParseContext that all data has been
1736  * fed into the parse context with g_markup_parse_context_parse().
1737  *
1738  * This function reports an error if the document isn't complete,
1739  * for example if elements are still open.
1740  *
1741  * Returns: %TRUE on success, %FALSE if an error was set
1742  */
1743 gboolean
1744 g_markup_parse_context_end_parse (GMarkupParseContext  *context,
1745                                   GError              **error)
1746 {
1747   g_return_val_if_fail (context != NULL, FALSE);
1748   g_return_val_if_fail (!context->parsing, FALSE);
1749   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1750
1751   if (context->partial_chunk != NULL)
1752     {
1753       g_string_free (context->partial_chunk, TRUE);
1754       context->partial_chunk = NULL;
1755     }
1756
1757   if (context->document_empty)
1758     {
1759       set_error_literal (context, error, G_MARKUP_ERROR_EMPTY,
1760                          _("Document was empty or contained only whitespace"));
1761       return FALSE;
1762     }
1763
1764   context->parsing = TRUE;
1765
1766   switch (context->state)
1767     {
1768     case STATE_START:
1769       /* Nothing to do */
1770       break;
1771
1772     case STATE_AFTER_OPEN_ANGLE:
1773       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1774                          _("Document ended unexpectedly just after an open angle bracket '<'"));
1775       break;
1776
1777     case STATE_AFTER_CLOSE_ANGLE:
1778       if (context->tag_stack != NULL)
1779         {
1780           /* Error message the same as for INSIDE_TEXT */
1781           set_error (context, error, G_MARKUP_ERROR_PARSE,
1782                      _("Document ended unexpectedly with elements still open - "
1783                        "'%s' was the last element opened"),
1784                      current_element (context));
1785         }
1786       break;
1787
1788     case STATE_AFTER_ELISION_SLASH:
1789       set_error (context, error, G_MARKUP_ERROR_PARSE,
1790                  _("Document ended unexpectedly, expected to see a close angle "
1791                    "bracket ending the tag <%s/>"), current_element (context));
1792       break;
1793
1794     case STATE_INSIDE_OPEN_TAG_NAME:
1795       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1796                          _("Document ended unexpectedly inside an element name"));
1797       break;
1798
1799     case STATE_INSIDE_ATTRIBUTE_NAME:
1800     case STATE_AFTER_ATTRIBUTE_NAME:
1801       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1802                          _("Document ended unexpectedly inside an attribute name"));
1803       break;
1804
1805     case STATE_BETWEEN_ATTRIBUTES:
1806       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1807                          _("Document ended unexpectedly inside an element-opening "
1808                            "tag."));
1809       break;
1810
1811     case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1812       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1813                          _("Document ended unexpectedly after the equals sign "
1814                            "following an attribute name; no attribute value"));
1815       break;
1816
1817     case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1818     case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1819       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1820                          _("Document ended unexpectedly while inside an attribute "
1821                            "value"));
1822       break;
1823
1824     case STATE_INSIDE_TEXT:
1825       g_assert (context->tag_stack != NULL);
1826       set_error (context, error, G_MARKUP_ERROR_PARSE,
1827                  _("Document ended unexpectedly with elements still open - "
1828                    "'%s' was the last element opened"),
1829                  current_element (context));
1830       break;
1831
1832     case STATE_AFTER_CLOSE_TAG_SLASH:
1833     case STATE_INSIDE_CLOSE_TAG_NAME:
1834     case STATE_AFTER_CLOSE_TAG_NAME:
1835       set_error (context, error, G_MARKUP_ERROR_PARSE,
1836                  _("Document ended unexpectedly inside the close tag for "
1837                    "element '%s'"), current_element (context));
1838       break;
1839
1840     case STATE_INSIDE_PASSTHROUGH:
1841       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1842                          _("Document ended unexpectedly inside a comment or "
1843                            "processing instruction"));
1844       break;
1845
1846     case STATE_ERROR:
1847     default:
1848       g_assert_not_reached ();
1849       break;
1850     }
1851
1852   context->parsing = FALSE;
1853
1854   return context->state != STATE_ERROR;
1855 }
1856
1857 /**
1858  * g_markup_parse_context_get_element:
1859  * @context: a #GMarkupParseContext
1860  *
1861  * Retrieves the name of the currently open element.
1862  *
1863  * If called from the start_element or end_element handlers this will
1864  * give the element_name as passed to those functions. For the parent
1865  * elements, see g_markup_parse_context_get_element_stack().
1866  *
1867  * Returns: the name of the currently open element, or %NULL
1868  *
1869  * Since: 2.2
1870  */
1871 const gchar *
1872 g_markup_parse_context_get_element (GMarkupParseContext *context)
1873 {
1874   g_return_val_if_fail (context != NULL, NULL);
1875
1876   if (context->tag_stack == NULL)
1877     return NULL;
1878   else
1879     return current_element (context);
1880 }
1881
1882 /**
1883  * g_markup_parse_context_get_element_stack:
1884  * @context: a #GMarkupParseContext
1885  *
1886  * Retrieves the element stack from the internal state of the parser.
1887  *
1888  * The returned #GSList is a list of strings where the first item is
1889  * the currently open tag (as would be returned by
1890  * g_markup_parse_context_get_element()) and the next item is its
1891  * immediate parent.
1892  *
1893  * This function is intended to be used in the start_element and
1894  * end_element handlers where g_markup_parse_context_get_element()
1895  * would merely return the name of the element that is being
1896  * processed.
1897  *
1898  * Returns: the element stack, which must not be modified
1899  *
1900  * Since: 2.16
1901  */
1902 const GSList *
1903 g_markup_parse_context_get_element_stack (GMarkupParseContext *context)
1904 {
1905   g_return_val_if_fail (context != NULL, NULL);
1906   return context->tag_stack;
1907 }
1908
1909 /**
1910  * g_markup_parse_context_get_position:
1911  * @context: a #GMarkupParseContext
1912  * @line_number: (nullable): return location for a line number, or %NULL
1913  * @char_number: (nullable): return location for a char-on-line number, or %NULL
1914  *
1915  * Retrieves the current line number and the number of the character on
1916  * that line. Intended for use in error messages; there are no strict
1917  * semantics for what constitutes the "current" line number other than
1918  * "the best number we could come up with for error messages."
1919  */
1920 void
1921 g_markup_parse_context_get_position (GMarkupParseContext *context,
1922                                      gint                *line_number,
1923                                      gint                *char_number)
1924 {
1925   g_return_if_fail (context != NULL);
1926
1927   if (line_number)
1928     *line_number = context->line_number;
1929
1930   if (char_number)
1931     *char_number = context->char_number;
1932 }
1933
1934 /**
1935  * g_markup_parse_context_get_user_data:
1936  * @context: a #GMarkupParseContext
1937  *
1938  * Returns the user_data associated with @context.
1939  *
1940  * This will either be the user_data that was provided to
1941  * g_markup_parse_context_new() or to the most recent call
1942  * of g_markup_parse_context_push().
1943  *
1944  * Returns: the provided user_data. The returned data belongs to
1945  *     the markup context and will be freed when
1946  *     g_markup_parse_context_free() is called.
1947  *
1948  * Since: 2.18
1949  */
1950 gpointer
1951 g_markup_parse_context_get_user_data (GMarkupParseContext *context)
1952 {
1953   return context->user_data;
1954 }
1955
1956 /**
1957  * g_markup_parse_context_push:
1958  * @context: a #GMarkupParseContext
1959  * @parser: a #GMarkupParser
1960  * @user_data: user data to pass to #GMarkupParser functions
1961  *
1962  * Temporarily redirects markup data to a sub-parser.
1963  *
1964  * This function may only be called from the start_element handler of
1965  * a #GMarkupParser. It must be matched with a corresponding call to
1966  * g_markup_parse_context_pop() in the matching end_element handler
1967  * (except in the case that the parser aborts due to an error).
1968  *
1969  * All tags, text and other data between the matching tags is
1970  * redirected to the subparser given by @parser. @user_data is used
1971  * as the user_data for that parser. @user_data is also passed to the
1972  * error callback in the event that an error occurs. This includes
1973  * errors that occur in subparsers of the subparser.
1974  *
1975  * The end tag matching the start tag for which this call was made is
1976  * handled by the previous parser (which is given its own user_data)
1977  * which is why g_markup_parse_context_pop() is provided to allow "one
1978  * last access" to the @user_data provided to this function. In the
1979  * case of error, the @user_data provided here is passed directly to
1980  * the error callback of the subparser and g_markup_parse_context_pop()
1981  * should not be called. In either case, if @user_data was allocated
1982  * then it ought to be freed from both of these locations.
1983  *
1984  * This function is not intended to be directly called by users
1985  * interested in invoking subparsers. Instead, it is intended to be
1986  * used by the subparsers themselves to implement a higher-level
1987  * interface.
1988  *
1989  * As an example, see the following implementation of a simple
1990  * parser that counts the number of tags encountered.
1991  *
1992  * |[<!-- language="C" -->
1993  * typedef struct
1994  * {
1995  *   gint tag_count;
1996  * } CounterData;
1997  *
1998  * static void
1999  * counter_start_element (GMarkupParseContext  *context,
2000  *                        const gchar          *element_name,
2001  *                        const gchar         **attribute_names,
2002  *                        const gchar         **attribute_values,
2003  *                        gpointer              user_data,
2004  *                        GError              **error)
2005  * {
2006  *   CounterData *data = user_data;
2007  *
2008  *   data->tag_count++;
2009  * }
2010  *
2011  * static void
2012  * counter_error (GMarkupParseContext *context,
2013  *                GError              *error,
2014  *                gpointer             user_data)
2015  * {
2016  *   CounterData *data = user_data;
2017  *
2018  *   g_slice_free (CounterData, data);
2019  * }
2020  *
2021  * static GMarkupParser counter_subparser =
2022  * {
2023  *   counter_start_element,
2024  *   NULL,
2025  *   NULL,
2026  *   NULL,
2027  *   counter_error
2028  * };
2029  * ]|
2030  *
2031  * In order to allow this parser to be easily used as a subparser, the
2032  * following interface is provided:
2033  *
2034  * |[<!-- language="C" -->
2035  * void
2036  * start_counting (GMarkupParseContext *context)
2037  * {
2038  *   CounterData *data = g_slice_new (CounterData);
2039  *
2040  *   data->tag_count = 0;
2041  *   g_markup_parse_context_push (context, &counter_subparser, data);
2042  * }
2043  *
2044  * gint
2045  * end_counting (GMarkupParseContext *context)
2046  * {
2047  *   CounterData *data = g_markup_parse_context_pop (context);
2048  *   int result;
2049  *
2050  *   result = data->tag_count;
2051  *   g_slice_free (CounterData, data);
2052  *
2053  *   return result;
2054  * }
2055  * ]|
2056  *
2057  * The subparser would then be used as follows:
2058  *
2059  * |[<!-- language="C" -->
2060  * static void start_element (context, element_name, ...)
2061  * {
2062  *   if (strcmp (element_name, "count-these") == 0)
2063  *     start_counting (context);
2064  *
2065  *   // else, handle other tags...
2066  * }
2067  *
2068  * static void end_element (context, element_name, ...)
2069  * {
2070  *   if (strcmp (element_name, "count-these") == 0)
2071  *     g_print ("Counted %d tags\n", end_counting (context));
2072  *
2073  *   // else, handle other tags...
2074  * }
2075  * ]|
2076  *
2077  * Since: 2.18
2078  **/
2079 void
2080 g_markup_parse_context_push (GMarkupParseContext *context,
2081                              const GMarkupParser *parser,
2082                              gpointer             user_data)
2083 {
2084   GMarkupRecursionTracker *tracker;
2085
2086   tracker = g_slice_new (GMarkupRecursionTracker);
2087   tracker->prev_element = context->subparser_element;
2088   tracker->prev_parser = context->parser;
2089   tracker->prev_user_data = context->user_data;
2090
2091   context->subparser_element = current_element (context);
2092   context->parser = parser;
2093   context->user_data = user_data;
2094
2095   context->subparser_stack = g_slist_prepend (context->subparser_stack,
2096                                               tracker);
2097 }
2098
2099 /**
2100  * g_markup_parse_context_pop:
2101  * @context: a #GMarkupParseContext
2102  *
2103  * Completes the process of a temporary sub-parser redirection.
2104  *
2105  * This function exists to collect the user_data allocated by a
2106  * matching call to g_markup_parse_context_push(). It must be called
2107  * in the end_element handler corresponding to the start_element
2108  * handler during which g_markup_parse_context_push() was called.
2109  * You must not call this function from the error callback -- the
2110  * @user_data is provided directly to the callback in that case.
2111  *
2112  * This function is not intended to be directly called by users
2113  * interested in invoking subparsers. Instead, it is intended to
2114  * be used by the subparsers themselves to implement a higher-level
2115  * interface.
2116  *
2117  * Returns: the user data passed to g_markup_parse_context_push()
2118  *
2119  * Since: 2.18
2120  */
2121 gpointer
2122 g_markup_parse_context_pop (GMarkupParseContext *context)
2123 {
2124   gpointer user_data;
2125
2126   if (!context->awaiting_pop)
2127     possibly_finish_subparser (context);
2128
2129   g_assert (context->awaiting_pop);
2130
2131   context->awaiting_pop = FALSE;
2132
2133   /* valgrind friendliness */
2134   user_data = context->held_user_data;
2135   context->held_user_data = NULL;
2136
2137   return user_data;
2138 }
2139
2140 static void
2141 append_escaped_text (GString     *str,
2142                      const gchar *text,
2143                      gssize       length)
2144 {
2145   const gchar *p;
2146   const gchar *end;
2147   gunichar c;
2148
2149   p = text;
2150   end = text + length;
2151
2152   while (p < end)
2153     {
2154       const gchar *next;
2155       next = g_utf8_next_char (p);
2156
2157       switch (*p)
2158         {
2159         case '&':
2160           g_string_append (str, "&amp;");
2161           break;
2162
2163         case '<':
2164           g_string_append (str, "&lt;");
2165           break;
2166
2167         case '>':
2168           g_string_append (str, "&gt;");
2169           break;
2170
2171         case '\'':
2172           g_string_append (str, "&apos;");
2173           break;
2174
2175         case '"':
2176           g_string_append (str, "&quot;");
2177           break;
2178
2179         default:
2180           c = g_utf8_get_char (p);
2181           if ((0x1 <= c && c <= 0x8) ||
2182               (0xb <= c && c  <= 0xc) ||
2183               (0xe <= c && c <= 0x1f) ||
2184               (0x7f <= c && c <= 0x84) ||
2185               (0x86 <= c && c <= 0x9f))
2186             g_string_append_printf (str, "&#x%x;", c);
2187           else
2188             g_string_append_len (str, p, next - p);
2189           break;
2190         }
2191
2192       p = next;
2193     }
2194 }
2195
2196 /**
2197  * g_markup_escape_text:
2198  * @text: some valid UTF-8 text
2199  * @length: length of @text in bytes, or -1 if the text is nul-terminated
2200  *
2201  * Escapes text so that the markup parser will parse it verbatim.
2202  * Less than, greater than, ampersand, etc. are replaced with the
2203  * corresponding entities. This function would typically be used
2204  * when writing out a file to be parsed with the markup parser.
2205  *
2206  * Note that this function doesn't protect whitespace and line endings
2207  * from being processed according to the XML rules for normalization
2208  * of line endings and attribute values.
2209  *
2210  * Note also that this function will produce character references in
2211  * the range of &#x1; ... &#x1f; for all control sequences
2212  * except for tabstop, newline and carriage return.  The character
2213  * references in this range are not valid XML 1.0, but they are
2214  * valid XML 1.1 and will be accepted by the GMarkup parser.
2215  *
2216  * Returns: a newly allocated string with the escaped text
2217  */
2218 gchar*
2219 g_markup_escape_text (const gchar *text,
2220                       gssize       length)
2221 {
2222   GString *str;
2223
2224   g_return_val_if_fail (text != NULL, NULL);
2225
2226   if (length < 0)
2227     length = strlen (text);
2228
2229   /* prealloc at least as long as original text */
2230   str = g_string_sized_new (length);
2231   append_escaped_text (str, text, length);
2232
2233   return g_string_free (str, FALSE);
2234 }
2235
2236 /*
2237  * find_conversion:
2238  * @format: a printf-style format string
2239  * @after: location to store a pointer to the character after
2240  *     the returned conversion. On a %NULL return, returns the
2241  *     pointer to the trailing NUL in the string
2242  *
2243  * Find the next conversion in a printf-style format string.
2244  * Partially based on code from printf-parser.c,
2245  * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
2246  *
2247  * Returns: pointer to the next conversion in @format,
2248  *  or %NULL, if none.
2249  */
2250 static const char *
2251 find_conversion (const char  *format,
2252                  const char **after)
2253 {
2254   const char *start = format;
2255   const char *cp;
2256
2257   while (*start != '\0' && *start != '%')
2258     start++;
2259
2260   if (*start == '\0')
2261     {
2262       *after = start;
2263       return NULL;
2264     }
2265
2266   cp = start + 1;
2267
2268   if (*cp == '\0')
2269     {
2270       *after = cp;
2271       return NULL;
2272     }
2273
2274   /* Test for positional argument.  */
2275   if (*cp >= '0' && *cp <= '9')
2276     {
2277       const char *np;
2278
2279       for (np = cp; *np >= '0' && *np <= '9'; np++)
2280         ;
2281       if (*np == '$')
2282         cp = np + 1;
2283     }
2284
2285   /* Skip the flags.  */
2286   for (;;)
2287     {
2288       if (*cp == '\'' ||
2289           *cp == '-' ||
2290           *cp == '+' ||
2291           *cp == ' ' ||
2292           *cp == '#' ||
2293           *cp == '0')
2294         cp++;
2295       else
2296         break;
2297     }
2298
2299   /* Skip the field width.  */
2300   if (*cp == '*')
2301     {
2302       cp++;
2303
2304       /* Test for positional argument.  */
2305       if (*cp >= '0' && *cp <= '9')
2306         {
2307           const char *np;
2308
2309           for (np = cp; *np >= '0' && *np <= '9'; np++)
2310             ;
2311           if (*np == '$')
2312             cp = np + 1;
2313         }
2314     }
2315   else
2316     {
2317       for (; *cp >= '0' && *cp <= '9'; cp++)
2318         ;
2319     }
2320
2321   /* Skip the precision.  */
2322   if (*cp == '.')
2323     {
2324       cp++;
2325       if (*cp == '*')
2326         {
2327           /* Test for positional argument.  */
2328           if (*cp >= '0' && *cp <= '9')
2329             {
2330               const char *np;
2331
2332               for (np = cp; *np >= '0' && *np <= '9'; np++)
2333                 ;
2334               if (*np == '$')
2335                 cp = np + 1;
2336             }
2337         }
2338       else
2339         {
2340           for (; *cp >= '0' && *cp <= '9'; cp++)
2341             ;
2342         }
2343     }
2344
2345   /* Skip argument type/size specifiers.  */
2346   while (*cp == 'h' ||
2347          *cp == 'L' ||
2348          *cp == 'l' ||
2349          *cp == 'j' ||
2350          *cp == 'z' ||
2351          *cp == 'Z' ||
2352          *cp == 't')
2353     cp++;
2354
2355   /* Skip the conversion character.  */
2356   cp++;
2357
2358   *after = cp;
2359   return start;
2360 }
2361
2362 /**
2363  * g_markup_vprintf_escaped:
2364  * @format: printf() style format string
2365  * @args: variable argument list, similar to vprintf()
2366  *
2367  * Formats the data in @args according to @format, escaping
2368  * all string and character arguments in the fashion
2369  * of g_markup_escape_text(). See g_markup_printf_escaped().
2370  *
2371  * Returns: newly allocated result from formatting
2372  *  operation. Free with g_free().
2373  *
2374  * Since: 2.4
2375  */
2376 #pragma GCC diagnostic push
2377 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
2378
2379 gchar *
2380 g_markup_vprintf_escaped (const gchar *format,
2381                           va_list      args)
2382 {
2383   GString *format1;
2384   GString *format2;
2385   GString *result = NULL;
2386   gchar *output1 = NULL;
2387   gchar *output2 = NULL;
2388   const char *p, *op1, *op2;
2389   va_list args2;
2390
2391   /* The technique here, is that we make two format strings that
2392    * have the identical conversions in the identical order to the
2393    * original strings, but differ in the text in-between. We
2394    * then use the normal g_strdup_vprintf() to format the arguments
2395    * with the two new format strings. By comparing the results,
2396    * we can figure out what segments of the output come from
2397    * the original format string, and what from the arguments,
2398    * and thus know what portions of the string to escape.
2399    *
2400    * For instance, for:
2401    *
2402    *  g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5);
2403    *
2404    * We form the two format strings "%sX%dX" and %sY%sY". The results
2405    * of formatting with those two strings are
2406    *
2407    * "%sX%dX" => "Susan & FredX5X"
2408    * "%sY%dY" => "Susan & FredY5Y"
2409    *
2410    * To find the span of the first argument, we find the first position
2411    * where the two arguments differ, which tells us that the first
2412    * argument formatted to "Susan & Fred". We then escape that
2413    * to "Susan & Fred" and join up with the intermediate portions
2414    * of the format string and the second argument to get
2415    * "Susan & Fred ate 5 apples".
2416    */
2417
2418   /* Create the two modified format strings
2419    */
2420   format1 = g_string_new (NULL);
2421   format2 = g_string_new (NULL);
2422   p = format;
2423   while (TRUE)
2424     {
2425       const char *after;
2426       const char *conv = find_conversion (p, &after);
2427       if (!conv)
2428         break;
2429
2430       g_string_append_len (format1, conv, after - conv);
2431       g_string_append_c (format1, 'X');
2432       g_string_append_len (format2, conv, after - conv);
2433       g_string_append_c (format2, 'Y');
2434
2435       p = after;
2436     }
2437
2438   /* Use them to format the arguments
2439    */
2440   G_VA_COPY (args2, args);
2441
2442   output1 = g_strdup_vprintf (format1->str, args);
2443
2444   if (!output1)
2445     {
2446       va_end (args2);
2447       goto cleanup;
2448     }
2449
2450   output2 = g_strdup_vprintf (format2->str, args2);
2451   va_end (args2);
2452   if (!output2)
2453     goto cleanup;
2454   result = g_string_new (NULL);
2455
2456   /* Iterate through the original format string again,
2457    * copying the non-conversion portions and the escaped
2458    * converted arguments to the output string.
2459    */
2460   op1 = output1;
2461   op2 = output2;
2462   p = format;
2463   while (TRUE)
2464     {
2465       const char *after;
2466       const char *output_start;
2467       const char *conv = find_conversion (p, &after);
2468       char *escaped;
2469
2470       if (!conv)        /* The end, after points to the trailing \0 */
2471         {
2472           g_string_append_len (result, p, after - p);
2473           break;
2474         }
2475
2476       g_string_append_len (result, p, conv - p);
2477       output_start = op1;
2478       while (*op1 == *op2)
2479         {
2480           op1++;
2481           op2++;
2482         }
2483
2484       escaped = g_markup_escape_text (output_start, op1 - output_start);
2485       g_string_append (result, escaped);
2486       g_free (escaped);
2487
2488       p = after;
2489       op1++;
2490       op2++;
2491     }
2492
2493  cleanup:
2494   g_string_free (format1, TRUE);
2495   g_string_free (format2, TRUE);
2496   g_free (output1);
2497   g_free (output2);
2498
2499   if (result)
2500     return g_string_free (result, FALSE);
2501   else
2502     return NULL;
2503 }
2504
2505 #pragma GCC diagnostic pop
2506
2507 /**
2508  * g_markup_printf_escaped:
2509  * @format: printf() style format string
2510  * @...: the arguments to insert in the format string
2511  *
2512  * Formats arguments according to @format, escaping
2513  * all string and character arguments in the fashion
2514  * of g_markup_escape_text(). This is useful when you
2515  * want to insert literal strings into XML-style markup
2516  * output, without having to worry that the strings
2517  * might themselves contain markup.
2518  *
2519  * |[<!-- language="C" -->
2520  * const char *store = "Fortnum & Mason";
2521  * const char *item = "Tea";
2522  * char *output;
2523  *
2524  * output = g_markup_printf_escaped ("<purchase>"
2525  *                                   "<store>%s</store>"
2526  *                                   "<item>%s</item>"
2527  *                                   "</purchase>",
2528  *                                   store, item);
2529  * ]|
2530  *
2531  * Returns: newly allocated result from formatting
2532  *    operation. Free with g_free().
2533  *
2534  * Since: 2.4
2535  */
2536 gchar *
2537 g_markup_printf_escaped (const gchar *format, ...)
2538 {
2539   char *result;
2540   va_list args;
2541
2542   va_start (args, format);
2543   result = g_markup_vprintf_escaped (format, args);
2544   va_end (args);
2545
2546   return result;
2547 }
2548
2549 static gboolean
2550 g_markup_parse_boolean (const char  *string,
2551                         gboolean    *value)
2552 {
2553   char const * const falses[] = { "false", "f", "no", "n", "0" };
2554   char const * const trues[] = { "true", "t", "yes", "y", "1" };
2555   int i;
2556
2557   for (i = 0; i < G_N_ELEMENTS (falses); i++)
2558     {
2559       if (g_ascii_strcasecmp (string, falses[i]) == 0)
2560         {
2561           if (value != NULL)
2562             *value = FALSE;
2563
2564           return TRUE;
2565         }
2566     }
2567
2568   for (i = 0; i < G_N_ELEMENTS (trues); i++)
2569     {
2570       if (g_ascii_strcasecmp (string, trues[i]) == 0)
2571         {
2572           if (value != NULL)
2573             *value = TRUE;
2574
2575           return TRUE;
2576         }
2577     }
2578
2579   return FALSE;
2580 }
2581
2582 /**
2583  * GMarkupCollectType:
2584  * @G_MARKUP_COLLECT_INVALID: used to terminate the list of attributes
2585  *     to collect
2586  * @G_MARKUP_COLLECT_STRING: collect the string pointer directly from
2587  *     the attribute_values[] array. Expects a parameter of type (const
2588  *     char **). If %G_MARKUP_COLLECT_OPTIONAL is specified and the
2589  *     attribute isn't present then the pointer will be set to %NULL
2590  * @G_MARKUP_COLLECT_STRDUP: as with %G_MARKUP_COLLECT_STRING, but
2591  *     expects a parameter of type (char **) and g_strdup()s the
2592  *     returned pointer. The pointer must be freed with g_free()
2593  * @G_MARKUP_COLLECT_BOOLEAN: expects a parameter of type (gboolean *)
2594  *     and parses the attribute value as a boolean. Sets %FALSE if the
2595  *     attribute isn't present. Valid boolean values consist of
2596  *     (case-insensitive) "false", "f", "no", "n", "0" and "true", "t",
2597  *     "yes", "y", "1"
2598  * @G_MARKUP_COLLECT_TRISTATE: as with %G_MARKUP_COLLECT_BOOLEAN, but
2599  *     in the case of a missing attribute a value is set that compares
2600  *     equal to neither %FALSE nor %TRUE G_MARKUP_COLLECT_OPTIONAL is
2601  *     implied
2602  * @G_MARKUP_COLLECT_OPTIONAL: can be bitwise ORed with the other fields.
2603  *     If present, allows the attribute not to appear. A default value
2604  *     is set depending on what value type is used
2605  *
2606  * A mixed enumerated type and flags field. You must specify one type
2607  * (string, strdup, boolean, tristate).  Additionally, you may  optionally
2608  * bitwise OR the type with the flag %G_MARKUP_COLLECT_OPTIONAL.
2609  *
2610  * It is likely that this enum will be extended in the future to
2611  * support other types.
2612  */
2613
2614 /**
2615  * g_markup_collect_attributes:
2616  * @element_name: the current tag name
2617  * @attribute_names: the attribute names
2618  * @attribute_values: the attribute values
2619  * @error: a pointer to a #GError or %NULL
2620  * @first_type: the #GMarkupCollectType of the first attribute
2621  * @first_attr: the name of the first attribute
2622  * @...: a pointer to the storage location of the first attribute
2623  *     (or %NULL), followed by more types names and pointers, ending
2624  *     with %G_MARKUP_COLLECT_INVALID
2625  *
2626  * Collects the attributes of the element from the data passed to the
2627  * #GMarkupParser start_element function, dealing with common error
2628  * conditions and supporting boolean values.
2629  *
2630  * This utility function is not required to write a parser but can save
2631  * a lot of typing.
2632  *
2633  * The @element_name, @attribute_names, @attribute_values and @error
2634  * parameters passed to the start_element callback should be passed
2635  * unmodified to this function.
2636  *
2637  * Following these arguments is a list of "supported" attributes to collect.
2638  * It is an error to specify multiple attributes with the same name. If any
2639  * attribute not in the list appears in the @attribute_names array then an
2640  * unknown attribute error will result.
2641  *
2642  * The #GMarkupCollectType field allows specifying the type of collection
2643  * to perform and if a given attribute must appear or is optional.
2644  *
2645  * The attribute name is simply the name of the attribute to collect.
2646  *
2647  * The pointer should be of the appropriate type (see the descriptions
2648  * under #GMarkupCollectType) and may be %NULL in case a particular
2649  * attribute is to be allowed but ignored.
2650  *
2651  * This function deals with issuing errors for missing attributes
2652  * (of type %G_MARKUP_ERROR_MISSING_ATTRIBUTE), unknown attributes
2653  * (of type %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE) and duplicate
2654  * attributes (of type %G_MARKUP_ERROR_INVALID_CONTENT) as well
2655  * as parse errors for boolean-valued attributes (again of type
2656  * %G_MARKUP_ERROR_INVALID_CONTENT). In all of these cases %FALSE
2657  * will be returned and @error will be set as appropriate.
2658  *
2659  * Returns: %TRUE if successful
2660  *
2661  * Since: 2.16
2662  **/
2663 gboolean
2664 g_markup_collect_attributes (const gchar         *element_name,
2665                              const gchar        **attribute_names,
2666                              const gchar        **attribute_values,
2667                              GError             **error,
2668                              GMarkupCollectType   first_type,
2669                              const gchar         *first_attr,
2670                              ...)
2671 {
2672   GMarkupCollectType type;
2673   const gchar *attr;
2674   guint64 collected;
2675   int written;
2676   va_list ap;
2677   int i;
2678
2679   type = first_type;
2680   attr = first_attr;
2681   collected = 0;
2682   written = 0;
2683
2684   va_start (ap, first_attr);
2685   while (type != G_MARKUP_COLLECT_INVALID)
2686     {
2687       gboolean mandatory;
2688       const gchar *value;
2689
2690       mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL);
2691       type &= (G_MARKUP_COLLECT_OPTIONAL - 1);
2692
2693       /* tristate records a value != TRUE and != FALSE
2694        * for the case where the attribute is missing
2695        */
2696       if (type == G_MARKUP_COLLECT_TRISTATE)
2697         mandatory = FALSE;
2698
2699       for (i = 0; attribute_names[i]; i++)
2700         if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i)))
2701           if (!strcmp (attribute_names[i], attr))
2702             break;
2703
2704       /* ISO C99 only promises that the user can pass up to 127 arguments.
2705        * Subtracting the first 4 arguments plus the final NULL and dividing
2706        * by 3 arguments per collected attribute, we are left with a maximum
2707        * number of supported attributes of (127 - 5) / 3 = 40.
2708        *
2709        * In reality, nobody is ever going to call us with anywhere close to
2710        * 40 attributes to collect, so it is safe to assume that if i > 40
2711        * then the user has given some invalid or repeated arguments.  These
2712        * problems will be caught and reported at the end of the function.
2713        *
2714        * We know at this point that we have an error, but we don't know
2715        * what error it is, so just continue...
2716        */
2717       if (i < 40)
2718         collected |= (G_GUINT64_CONSTANT(1) << i);
2719
2720       value = attribute_values[i];
2721
2722       if (value == NULL && mandatory)
2723         {
2724           g_set_error (error, G_MARKUP_ERROR,
2725                        G_MARKUP_ERROR_MISSING_ATTRIBUTE,
2726                        "element '%s' requires attribute '%s'",
2727                        element_name, attr);
2728
2729           va_end (ap);
2730           goto failure;
2731         }
2732
2733       switch (type)
2734         {
2735         case G_MARKUP_COLLECT_STRING:
2736           {
2737             const char **str_ptr;
2738
2739             str_ptr = va_arg (ap, const char **);
2740
2741             if (str_ptr != NULL)
2742               *str_ptr = value;
2743           }
2744           break;
2745
2746         case G_MARKUP_COLLECT_STRDUP:
2747           {
2748             char **str_ptr;
2749
2750             str_ptr = va_arg (ap, char **);
2751
2752             if (str_ptr != NULL)
2753               *str_ptr = g_strdup (value);
2754           }
2755           break;
2756
2757         case G_MARKUP_COLLECT_BOOLEAN:
2758         case G_MARKUP_COLLECT_TRISTATE:
2759           if (value == NULL)
2760             {
2761               gboolean *bool_ptr;
2762
2763               bool_ptr = va_arg (ap, gboolean *);
2764
2765               if (bool_ptr != NULL)
2766                 {
2767                   if (type == G_MARKUP_COLLECT_TRISTATE)
2768                     /* constructivists rejoice!
2769                      * neither false nor true...
2770                      */
2771                     *bool_ptr = -1;
2772
2773                   else /* G_MARKUP_COLLECT_BOOLEAN */
2774                     *bool_ptr = FALSE;
2775                 }
2776             }
2777           else
2778             {
2779               if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *)))
2780                 {
2781                   g_set_error (error, G_MARKUP_ERROR,
2782                                G_MARKUP_ERROR_INVALID_CONTENT,
2783                                "element '%s', attribute '%s', value '%s' "
2784                                "cannot be parsed as a boolean value",
2785                                element_name, attr, value);
2786
2787                   va_end (ap);
2788                   goto failure;
2789                 }
2790             }
2791
2792           break;
2793
2794         default:
2795           g_assert_not_reached ();
2796         }
2797
2798       type = va_arg (ap, GMarkupCollectType);
2799       attr = va_arg (ap, const char *);
2800       written++;
2801     }
2802   va_end (ap);
2803
2804   /* ensure we collected all the arguments */
2805   for (i = 0; attribute_names[i]; i++)
2806     if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0)
2807       {
2808         /* attribute not collected:  could be caused by two things.
2809          *
2810          * 1) it doesn't exist in our list of attributes
2811          * 2) it existed but was matched by a duplicate attribute earlier
2812          *
2813          * find out.
2814          */
2815         int j;
2816
2817         for (j = 0; j < i; j++)
2818           if (strcmp (attribute_names[i], attribute_names[j]) == 0)
2819             /* duplicate! */
2820             break;
2821
2822         /* j is now the first occurrence of attribute_names[i] */
2823         if (i == j)
2824           g_set_error (error, G_MARKUP_ERROR,
2825                        G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
2826                        "attribute '%s' invalid for element '%s'",
2827                        attribute_names[i], element_name);
2828         else
2829           g_set_error (error, G_MARKUP_ERROR,
2830                        G_MARKUP_ERROR_INVALID_CONTENT,
2831                        "attribute '%s' given multiple times for element '%s'",
2832                        attribute_names[i], element_name);
2833
2834         goto failure;
2835       }
2836
2837   return TRUE;
2838
2839 failure:
2840   /* replay the above to free allocations */
2841   type = first_type;
2842   attr = first_attr;
2843
2844   va_start (ap, first_attr);
2845   while (type != G_MARKUP_COLLECT_INVALID)
2846     {
2847       gpointer ptr;
2848
2849       ptr = va_arg (ap, gpointer);
2850
2851       if (ptr != NULL)
2852         {
2853           switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1))
2854             {
2855             case G_MARKUP_COLLECT_STRDUP:
2856               if (written)
2857                 g_free (*(char **) ptr);
2858
2859             case G_MARKUP_COLLECT_STRING:
2860               *(char **) ptr = NULL;
2861               break;
2862
2863             case G_MARKUP_COLLECT_BOOLEAN:
2864               *(gboolean *) ptr = FALSE;
2865               break;
2866
2867             case G_MARKUP_COLLECT_TRISTATE:
2868               *(gboolean *) ptr = -1;
2869               break;
2870             }
2871         }
2872
2873       type = va_arg (ap, GMarkupCollectType);
2874       attr = va_arg (ap, const char *);
2875     }
2876   va_end (ap);
2877
2878   return FALSE;
2879 }