1 /* gmarkup.c - Simple XML-like parser
3 * Copyright 2000, 2003 Red Hat, Inc.
5 * GLib is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
10 * GLib is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with GLib; see the file COPYING.LIB. If not,
17 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
34 g_markup_error_quark (void)
36 return g_quark_from_static_string ("g-markup-error-quark");
42 STATE_AFTER_OPEN_ANGLE
,
43 STATE_AFTER_CLOSE_ANGLE
,
44 STATE_AFTER_ELISION_SLASH
, /* the slash that obviates need for end element */
45 STATE_INSIDE_OPEN_TAG_NAME
,
46 STATE_INSIDE_ATTRIBUTE_NAME
,
47 STATE_AFTER_ATTRIBUTE_NAME
,
48 STATE_BETWEEN_ATTRIBUTES
,
49 STATE_AFTER_ATTRIBUTE_EQUALS_SIGN
,
50 STATE_INSIDE_ATTRIBUTE_VALUE_SQ
,
51 STATE_INSIDE_ATTRIBUTE_VALUE_DQ
,
53 STATE_AFTER_CLOSE_TAG_SLASH
,
54 STATE_INSIDE_CLOSE_TAG_NAME
,
55 STATE_AFTER_CLOSE_TAG_NAME
,
56 STATE_INSIDE_PASSTHROUGH
,
60 struct _GMarkupParseContext
62 const GMarkupParser
*parser
;
64 GMarkupParseFlags flags
;
70 GDestroyNotify dnotify
;
72 /* A piece of character data or an element that
73 * hasn't "ended" yet so we haven't yet called
74 * the callback for it.
76 GString
*partial_chunk
;
78 GMarkupParseState state
;
85 const gchar
*current_text
;
86 gssize current_text_len
;
87 const gchar
*current_text_end
;
89 GString
*leftover_char_portion
;
91 /* used to save the start of the last interesting thingy */
96 guint document_empty
: 1;
102 * g_markup_parse_context_new:
103 * @parser: a #GMarkupParser
104 * @flags: one or more #GMarkupParseFlags
105 * @user_data: user data to pass to #GMarkupParser functions
106 * @user_data_dnotify: user data destroy notifier called when the parse context is freed
108 * Creates a new parse context. A parse context is used to parse
109 * marked-up documents. You can feed any number of documents into
110 * a context, as long as no errors occur; once an error occurs,
111 * the parse context can't continue to parse text (you have to free it
112 * and create a new parse context).
114 * Return value: a new #GMarkupParseContext
116 GMarkupParseContext
*
117 g_markup_parse_context_new (const GMarkupParser
*parser
,
118 GMarkupParseFlags flags
,
120 GDestroyNotify user_data_dnotify
)
122 GMarkupParseContext
*context
;
124 g_return_val_if_fail (parser
!= NULL
, NULL
);
126 context
= g_new (GMarkupParseContext
, 1);
128 context
->parser
= parser
;
129 context
->flags
= flags
;
130 context
->user_data
= user_data
;
131 context
->dnotify
= user_data_dnotify
;
133 context
->line_number
= 1;
134 context
->char_number
= 1;
136 context
->partial_chunk
= NULL
;
138 context
->state
= STATE_START
;
139 context
->tag_stack
= NULL
;
140 context
->attr_names
= NULL
;
141 context
->attr_values
= NULL
;
142 context
->cur_attr
= -1;
143 context
->alloc_attrs
= 0;
145 context
->current_text
= NULL
;
146 context
->current_text_len
= -1;
147 context
->current_text_end
= NULL
;
148 context
->leftover_char_portion
= NULL
;
150 context
->start
= NULL
;
151 context
->iter
= NULL
;
153 context
->document_empty
= TRUE
;
154 context
->parsing
= FALSE
;
156 context
->balance
= 0;
162 * g_markup_parse_context_free:
163 * @context: a #GMarkupParseContext
165 * Frees a #GMarkupParseContext. Can't be called from inside
166 * one of the #GMarkupParser functions.
170 g_markup_parse_context_free (GMarkupParseContext
*context
)
172 g_return_if_fail (context
!= NULL
);
173 g_return_if_fail (!context
->parsing
);
175 if (context
->dnotify
)
176 (* context
->dnotify
) (context
->user_data
);
178 g_strfreev (context
->attr_names
);
179 g_strfreev (context
->attr_values
);
181 g_slist_foreach (context
->tag_stack
, (GFunc
)g_free
, NULL
);
182 g_slist_free (context
->tag_stack
);
184 if (context
->partial_chunk
)
185 g_string_free (context
->partial_chunk
, TRUE
);
187 if (context
->leftover_char_portion
)
188 g_string_free (context
->leftover_char_portion
, TRUE
);
194 mark_error (GMarkupParseContext
*context
,
197 context
->state
= STATE_ERROR
;
199 if (context
->parser
->error
)
200 (*context
->parser
->error
) (context
, error
, context
->user_data
);
203 static void set_error (GMarkupParseContext
*context
,
207 ...) G_GNUC_PRINTF (4, 5);
210 set_error (GMarkupParseContext
*context
,
220 va_start (args
, format
);
221 s
= g_strdup_vprintf (format
, args
);
224 tmp_error
= g_error_new_literal (G_MARKUP_ERROR
, code
, s
);
227 g_prefix_error (&tmp_error
,
228 _("Error on line %d char %d: "),
229 context
->line_number
,
230 context
->char_number
);
232 mark_error (context
, tmp_error
);
234 g_propagate_error (error
, tmp_error
);
238 propagate_error (GMarkupParseContext
*context
,
242 if (context
->flags
& G_MARKUP_PREFIX_ERROR_POSITION
)
243 g_prefix_error (&src
,
244 _("Error on line %d char %d: "),
245 context
->line_number
,
246 context
->char_number
);
248 mark_error (context
, src
);
250 g_propagate_error (dest
, src
);
253 /* To make these faster, we first use the ascii-only tests, then check
254 * for the usual non-alnum name-end chars, and only then call the
255 * expensive unicode stuff. Nobody uses non-ascii in XML tag/attribute
256 * names, so this is a reasonable hack that virtually always avoids
259 #define IS_COMMON_NAME_END_CHAR(c) \
260 ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
263 is_name_start_char (const gchar
*p
)
265 if (g_ascii_isalpha (*p
) ||
266 (!IS_COMMON_NAME_END_CHAR (*p
) &&
269 g_unichar_isalpha (g_utf8_get_char (p
)))))
276 is_name_char (const gchar
*p
)
278 if (g_ascii_isalnum (*p
) ||
279 (!IS_COMMON_NAME_END_CHAR (*p
) &&
284 g_unichar_isalpha (g_utf8_get_char (p
)))))
292 char_str (gunichar c
,
296 g_unichar_to_utf8 (c
, buf
);
301 utf8_str (const gchar
*utf8
,
304 char_str (g_utf8_get_char (utf8
), buf
);
309 set_unescape_error (GMarkupParseContext
*context
,
311 const gchar
*remaining_text
,
312 const gchar
*remaining_text_end
,
320 gint remaining_newlines
;
323 remaining_newlines
= 0;
325 while (p
!= remaining_text_end
)
328 ++remaining_newlines
;
332 va_start (args
, format
);
333 s
= g_strdup_vprintf (format
, args
);
336 tmp_error
= g_error_new (G_MARKUP_ERROR
,
338 _("Error on line %d: %s"),
339 context
->line_number
- remaining_newlines
,
344 mark_error (context
, tmp_error
);
346 g_propagate_error (error
, tmp_error
);
352 USTATE_AFTER_AMPERSAND
,
353 USTATE_INSIDE_ENTITY_NAME
,
354 USTATE_AFTER_CHARREF_HASH
359 GMarkupParseContext
*context
;
363 const gchar
*text_end
;
364 const gchar
*entity_start
;
368 unescape_text_state_inside_text (UnescapeContext
*ucontext
,
373 gboolean normalize_attribute
;
375 if (ucontext
->context
->state
== STATE_INSIDE_ATTRIBUTE_VALUE_SQ
||
376 ucontext
->context
->state
== STATE_INSIDE_ATTRIBUTE_VALUE_DQ
)
377 normalize_attribute
= TRUE
;
379 normalize_attribute
= FALSE
;
383 while (p
!= ucontext
->text_end
)
389 else if (normalize_attribute
&& (*p
== '\t' || *p
== '\n'))
391 g_string_append_len (ucontext
->str
, start
, p
- start
);
392 g_string_append_c (ucontext
->str
, ' ');
393 p
= g_utf8_next_char (p
);
398 g_string_append_len (ucontext
->str
, start
, p
- start
);
399 g_string_append_c (ucontext
->str
, normalize_attribute
? ' ' : '\n');
400 p
= g_utf8_next_char (p
);
401 if (p
!= ucontext
->text_end
&& *p
== '\n')
402 p
= g_utf8_next_char (p
);
406 p
= g_utf8_next_char (p
);
410 g_string_append_len (ucontext
->str
, start
, p
- start
);
412 if (p
!= ucontext
->text_end
&& *p
== '&')
414 p
= g_utf8_next_char (p
);
415 ucontext
->state
= USTATE_AFTER_AMPERSAND
;
422 unescape_text_state_after_ampersand (UnescapeContext
*ucontext
,
426 ucontext
->entity_start
= NULL
;
430 p
= g_utf8_next_char (p
);
432 ucontext
->entity_start
= p
;
433 ucontext
->state
= USTATE_AFTER_CHARREF_HASH
;
435 else if (!is_name_start_char (p
))
439 set_unescape_error (ucontext
->context
, error
,
440 p
, ucontext
->text_end
,
441 G_MARKUP_ERROR_PARSE
,
442 _("Empty entity '&;' seen; valid "
443 "entities are: & " < > '"));
449 set_unescape_error (ucontext
->context
, error
,
450 p
, ucontext
->text_end
,
451 G_MARKUP_ERROR_PARSE
,
452 _("Character '%s' is not valid at "
453 "the start of an entity name; "
454 "the & character begins an entity; "
455 "if this ampersand isn't supposed "
456 "to be an entity, escape it as "
463 ucontext
->entity_start
= p
;
464 ucontext
->state
= USTATE_INSIDE_ENTITY_NAME
;
471 unescape_text_state_inside_entity_name (UnescapeContext
*ucontext
,
475 while (p
!= ucontext
->text_end
)
479 else if (!is_name_char (p
))
483 set_unescape_error (ucontext
->context
, error
,
484 p
, ucontext
->text_end
,
485 G_MARKUP_ERROR_PARSE
,
486 _("Character '%s' is not valid "
487 "inside an entity name"),
492 p
= g_utf8_next_char (p
);
495 if (ucontext
->context
->state
!= STATE_ERROR
)
497 if (p
!= ucontext
->text_end
)
499 gint len
= p
- ucontext
->entity_start
;
501 /* move to after semicolon */
502 p
= g_utf8_next_char (p
);
503 ucontext
->state
= USTATE_INSIDE_TEXT
;
505 if (strncmp (ucontext
->entity_start
, "lt", len
) == 0)
506 g_string_append_c (ucontext
->str
, '<');
507 else if (strncmp (ucontext
->entity_start
, "gt", len
) == 0)
508 g_string_append_c (ucontext
->str
, '>');
509 else if (strncmp (ucontext
->entity_start
, "amp", len
) == 0)
510 g_string_append_c (ucontext
->str
, '&');
511 else if (strncmp (ucontext
->entity_start
, "quot", len
) == 0)
512 g_string_append_c (ucontext
->str
, '"');
513 else if (strncmp (ucontext
->entity_start
, "apos", len
) == 0)
514 g_string_append_c (ucontext
->str
, '\'');
519 name
= g_strndup (ucontext
->entity_start
, len
);
520 set_unescape_error (ucontext
->context
, error
,
521 p
, ucontext
->text_end
,
522 G_MARKUP_ERROR_PARSE
,
523 _("Entity name '%s' is not known"),
530 set_unescape_error (ucontext
->context
, error
,
531 /* give line number of the & */
532 ucontext
->entity_start
, ucontext
->text_end
,
533 G_MARKUP_ERROR_PARSE
,
534 _("Entity did not end with a semicolon; "
535 "most likely you used an ampersand "
536 "character without intending to start "
537 "an entity - escape ampersand as &"));
546 unescape_text_state_after_charref_hash (UnescapeContext
*ucontext
,
550 gboolean is_hex
= FALSE
;
553 start
= ucontext
->entity_start
;
558 p
= g_utf8_next_char (p
);
562 while (p
!= ucontext
->text_end
&& *p
!= ';')
563 p
= g_utf8_next_char (p
);
565 if (p
!= ucontext
->text_end
)
567 g_assert (*p
== ';');
569 /* digit is between start and p */
578 l
= strtoul (start
, &end
, 16);
580 l
= strtoul (start
, &end
, 10);
582 if (end
!= p
|| errno
!= 0)
584 set_unescape_error (ucontext
->context
, error
,
585 start
, ucontext
->text_end
,
586 G_MARKUP_ERROR_PARSE
,
587 _("Failed to parse '%-.*s', which "
588 "should have been a digit "
589 "inside a character reference "
590 "(ê for example) - perhaps "
591 "the digit is too large"),
596 /* characters XML permits */
600 (l
>= 0x20 && l
<= 0xD7FF) ||
601 (l
>= 0xE000 && l
<= 0xFFFD) ||
602 (l
>= 0x10000 && l
<= 0x10FFFF))
605 g_string_append (ucontext
->str
, char_str (l
, buf
));
609 set_unescape_error (ucontext
->context
, error
,
610 start
, ucontext
->text_end
,
611 G_MARKUP_ERROR_PARSE
,
612 _("Character reference '%-.*s' does not "
613 "encode a permitted character"),
618 /* Move to next state */
619 p
= g_utf8_next_char (p
); /* past semicolon */
620 ucontext
->state
= USTATE_INSIDE_TEXT
;
624 set_unescape_error (ucontext
->context
, error
,
625 start
, ucontext
->text_end
,
626 G_MARKUP_ERROR_PARSE
,
627 _("Empty character reference; "
628 "should include a digit such as "
634 set_unescape_error (ucontext
->context
, error
,
635 start
, ucontext
->text_end
,
636 G_MARKUP_ERROR_PARSE
,
637 _("Character reference did not end with a "
639 "most likely you used an ampersand "
640 "character without intending to start "
641 "an entity - escape ampersand as &"));
648 unescape_text (GMarkupParseContext
*context
,
650 const gchar
*text_end
,
654 UnescapeContext ucontext
;
657 ucontext
.context
= context
;
658 ucontext
.text
= text
;
659 ucontext
.text_end
= text_end
;
660 ucontext
.entity_start
= NULL
;
662 ucontext
.str
= g_string_sized_new (text_end
- text
);
664 ucontext
.state
= USTATE_INSIDE_TEXT
;
667 while (p
!= text_end
&& context
->state
!= STATE_ERROR
)
669 g_assert (p
< text_end
);
671 switch (ucontext
.state
)
673 case USTATE_INSIDE_TEXT
:
675 p
= unescape_text_state_inside_text (&ucontext
,
681 case USTATE_AFTER_AMPERSAND
:
683 p
= unescape_text_state_after_ampersand (&ucontext
,
690 case USTATE_INSIDE_ENTITY_NAME
:
692 p
= unescape_text_state_inside_entity_name (&ucontext
,
698 case USTATE_AFTER_CHARREF_HASH
:
700 p
= unescape_text_state_after_charref_hash (&ucontext
,
707 g_assert_not_reached ();
712 if (context
->state
!= STATE_ERROR
)
714 switch (ucontext
.state
)
716 case USTATE_INSIDE_TEXT
:
718 case USTATE_AFTER_AMPERSAND
:
719 case USTATE_INSIDE_ENTITY_NAME
:
720 set_unescape_error (context
, error
,
722 G_MARKUP_ERROR_PARSE
,
723 _("Unfinished entity reference"));
725 case USTATE_AFTER_CHARREF_HASH
:
726 set_unescape_error (context
, error
,
728 G_MARKUP_ERROR_PARSE
,
729 _("Unfinished character reference"));
734 if (context
->state
== STATE_ERROR
)
736 g_string_free (ucontext
.str
, TRUE
);
742 *unescaped
= ucontext
.str
;
747 static inline gboolean
748 advance_char (GMarkupParseContext
*context
)
750 context
->iter
= g_utf8_next_char (context
->iter
);
751 context
->char_number
+= 1;
753 if (context
->iter
== context
->current_text_end
)
757 else if (*context
->iter
== '\n')
759 context
->line_number
+= 1;
760 context
->char_number
= 1;
766 static inline gboolean
769 return c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r';
773 skip_spaces (GMarkupParseContext
*context
)
777 if (!xml_isspace (*context
->iter
))
780 while (advance_char (context
));
784 advance_to_name_end (GMarkupParseContext
*context
)
788 if (!is_name_char (context
->iter
))
791 while (advance_char (context
));
795 add_to_partial (GMarkupParseContext
*context
,
796 const gchar
*text_start
,
797 const gchar
*text_end
)
799 if (context
->partial_chunk
== NULL
)
800 context
->partial_chunk
= g_string_sized_new (text_end
- text_start
);
802 if (text_start
!= text_end
)
803 g_string_append_len (context
->partial_chunk
, text_start
,
804 text_end
- text_start
);
806 /* Invariant here that partial_chunk exists */
810 truncate_partial (GMarkupParseContext
*context
)
812 if (context
->partial_chunk
!= NULL
)
814 context
->partial_chunk
= g_string_truncate (context
->partial_chunk
, 0);
819 current_element (GMarkupParseContext
*context
)
821 return context
->tag_stack
->data
;
825 current_attribute (GMarkupParseContext
*context
)
827 g_assert (context
->cur_attr
>= 0);
828 return context
->attr_names
[context
->cur_attr
];
832 find_current_text_end (GMarkupParseContext
*context
)
834 /* This function must be safe (non-segfaulting) on invalid UTF8.
835 * It assumes the string starts with a character start
837 const gchar
*end
= context
->current_text
+ context
->current_text_len
;
841 g_assert (context
->current_text_len
> 0);
843 p
= g_utf8_find_prev_char (context
->current_text
, end
);
845 g_assert (p
!= NULL
); /* since current_text was a char start */
847 /* p is now the start of the last character or character portion. */
849 next
= g_utf8_next_char (p
); /* this only touches *p, nothing beyond */
853 /* whole character */
854 context
->current_text_end
= end
;
859 context
->leftover_char_portion
= g_string_new_len (p
, end
- p
);
860 context
->current_text_len
-= (end
- p
);
861 context
->current_text_end
= p
;
867 add_attribute (GMarkupParseContext
*context
, char *name
)
869 if (context
->cur_attr
+ 2 >= context
->alloc_attrs
)
871 context
->alloc_attrs
+= 5; /* silly magic number */
872 context
->attr_names
= g_realloc (context
->attr_names
, sizeof(char*)*context
->alloc_attrs
);
873 context
->attr_values
= g_realloc (context
->attr_values
, sizeof(char*)*context
->alloc_attrs
);
876 context
->attr_names
[context
->cur_attr
] = name
;
877 context
->attr_values
[context
->cur_attr
] = NULL
;
878 context
->attr_names
[context
->cur_attr
+1] = NULL
;
879 context
->attr_values
[context
->cur_attr
+1] = NULL
;
883 * g_markup_parse_context_parse:
884 * @context: a #GMarkupParseContext
885 * @text: chunk of text to parse
886 * @text_len: length of @text in bytes
887 * @error: return location for a #GError
889 * Feed some data to the #GMarkupParseContext. The data need not
890 * be valid UTF-8; an error will be signaled if it's invalid.
891 * The data need not be an entire document; you can feed a document
892 * into the parser incrementally, via multiple calls to this function.
893 * Typically, as you receive data from a network connection or file,
894 * you feed each received chunk of data into this function, aborting
895 * the process if an error occurs. Once an error is reported, no further
896 * data may be fed to the #GMarkupParseContext; all errors are fatal.
898 * Return value: %FALSE if an error occurred, %TRUE on success
901 g_markup_parse_context_parse (GMarkupParseContext
*context
,
906 const gchar
*first_invalid
;
908 g_return_val_if_fail (context
!= NULL
, FALSE
);
909 g_return_val_if_fail (text
!= NULL
, FALSE
);
910 g_return_val_if_fail (context
->state
!= STATE_ERROR
, FALSE
);
911 g_return_val_if_fail (!context
->parsing
, FALSE
);
914 text_len
= strlen (text
);
919 context
->parsing
= TRUE
;
921 if (context
->leftover_char_portion
)
923 const gchar
*first_char
;
925 if ((*text
& 0xc0) != 0x80)
928 first_char
= g_utf8_find_next_char (text
, text
+ text_len
);
932 /* leftover_char_portion was completed. Parse it. */
933 GString
*portion
= context
->leftover_char_portion
;
935 g_string_append_len (context
->leftover_char_portion
,
936 text
, first_char
- text
);
938 /* hacks to allow recursion */
939 context
->parsing
= FALSE
;
940 context
->leftover_char_portion
= NULL
;
942 if (!g_markup_parse_context_parse (context
,
943 portion
->str
, portion
->len
,
946 g_assert (context
->state
== STATE_ERROR
);
949 g_string_free (portion
, TRUE
);
950 context
->parsing
= TRUE
;
952 /* Skip the fraction of char that was in this text */
953 text_len
-= (first_char
- text
);
958 /* another little chunk of the leftover char; geez
959 * someone is inefficient.
961 g_string_append_len (context
->leftover_char_portion
,
964 if (context
->leftover_char_portion
->len
> 7)
966 /* The leftover char portion is too big to be
971 G_MARKUP_ERROR_BAD_UTF8
,
972 _("Invalid UTF-8 encoded text - overlong sequence"));
979 context
->current_text
= text
;
980 context
->current_text_len
= text_len
;
981 context
->iter
= context
->current_text
;
982 context
->start
= context
->iter
;
984 /* Nothing left after finishing the leftover char, or nothing
985 * passed in to begin with.
987 if (context
->current_text_len
== 0)
990 /* find_current_text_end () assumes the string starts at
991 * a character start, so we need to validate at least
992 * that much. It doesn't assume any following bytes
995 if ((*context
->current_text
& 0xc0) == 0x80) /* not a char start */
999 G_MARKUP_ERROR_BAD_UTF8
,
1000 _("Invalid UTF-8 encoded text - not a start char"));
1004 /* Initialize context->current_text_end, possibly adjusting
1005 * current_text_len, and add any leftover char portion
1007 find_current_text_end (context
);
1009 /* Validate UTF8 (must be done after we find the end, since
1010 * we could have a trailing incomplete char)
1012 if (!g_utf8_validate (context
->current_text
,
1013 context
->current_text_len
,
1018 q
= p
= context
->current_text
;
1019 while (p
!= first_invalid
)
1025 context
->char_number
= 1;
1030 context
->line_number
+= newlines
;
1031 context
->char_number
+= g_utf8_strlen (q
, first_invalid
- q
);
1035 G_MARKUP_ERROR_BAD_UTF8
,
1036 _("Invalid UTF-8 encoded text - not valid '%s'"),
1037 g_strndup (context
->current_text
,
1038 context
->current_text_len
));
1042 while (context
->iter
!= context
->current_text_end
)
1044 switch (context
->state
)
1047 /* Possible next state: AFTER_OPEN_ANGLE */
1049 g_assert (context
->tag_stack
== NULL
);
1051 /* whitespace is ignored outside of any elements */
1052 skip_spaces (context
);
1054 if (context
->iter
!= context
->current_text_end
)
1056 if (*context
->iter
== '<')
1058 /* Move after the open angle */
1059 advance_char (context
);
1061 context
->state
= STATE_AFTER_OPEN_ANGLE
;
1063 /* this could start a passthrough */
1064 context
->start
= context
->iter
;
1066 /* document is now non-empty */
1067 context
->document_empty
= FALSE
;
1073 G_MARKUP_ERROR_PARSE
,
1074 _("Document must begin with an element (e.g. <book>)"));
1079 case STATE_AFTER_OPEN_ANGLE
:
1080 /* Possible next states: INSIDE_OPEN_TAG_NAME,
1081 * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
1083 if (*context
->iter
== '?' ||
1084 *context
->iter
== '!')
1086 /* include < in the passthrough */
1087 const gchar
*openangle
= "<";
1088 add_to_partial (context
, openangle
, openangle
+ 1);
1089 context
->start
= context
->iter
;
1090 context
->balance
= 1;
1091 context
->state
= STATE_INSIDE_PASSTHROUGH
;
1093 else if (*context
->iter
== '/')
1096 advance_char (context
);
1098 context
->state
= STATE_AFTER_CLOSE_TAG_SLASH
;
1100 else if (is_name_start_char (context
->iter
))
1102 context
->state
= STATE_INSIDE_OPEN_TAG_NAME
;
1104 /* start of tag name */
1105 context
->start
= context
->iter
;
1113 G_MARKUP_ERROR_PARSE
,
1114 _("'%s' is not a valid character following "
1115 "a '<' character; it may not begin an "
1117 utf8_str (context
->iter
, buf
));
1121 /* The AFTER_CLOSE_ANGLE state is actually sort of
1122 * broken, because it doesn't correspond to a range
1123 * of characters in the input stream as the others do,
1124 * and thus makes things harder to conceptualize
1126 case STATE_AFTER_CLOSE_ANGLE
:
1127 /* Possible next states: INSIDE_TEXT, STATE_START */
1128 if (context
->tag_stack
== NULL
)
1130 context
->start
= NULL
;
1131 context
->state
= STATE_START
;
1135 context
->start
= context
->iter
;
1136 context
->state
= STATE_INSIDE_TEXT
;
1140 case STATE_AFTER_ELISION_SLASH
:
1141 /* Possible next state: AFTER_CLOSE_ANGLE */
1144 /* We need to pop the tag stack and call the end_element
1145 * function, since this is the close tag
1147 GError
*tmp_error
= NULL
;
1149 g_assert (context
->tag_stack
!= NULL
);
1152 if (context
->parser
->end_element
)
1153 (* context
->parser
->end_element
) (context
,
1154 context
->tag_stack
->data
,
1160 mark_error (context
, tmp_error
);
1161 g_propagate_error (error
, tmp_error
);
1165 if (*context
->iter
== '>')
1167 /* move after the close angle */
1168 advance_char (context
);
1169 context
->state
= STATE_AFTER_CLOSE_ANGLE
;
1177 G_MARKUP_ERROR_PARSE
,
1178 _("Odd character '%s', expected a '>' character "
1179 "to end the start tag of element '%s'"),
1180 utf8_str (context
->iter
, buf
),
1181 current_element (context
));
1185 g_free (context
->tag_stack
->data
);
1186 context
->tag_stack
= g_slist_delete_link (context
->tag_stack
,
1187 context
->tag_stack
);
1191 case STATE_INSIDE_OPEN_TAG_NAME
:
1192 /* Possible next states: BETWEEN_ATTRIBUTES */
1194 /* if there's a partial chunk then it's the first part of the
1195 * tag name. If there's a context->start then it's the start
1196 * of the tag name in current_text, the partial chunk goes
1197 * before that start though.
1199 advance_to_name_end (context
);
1201 if (context
->iter
== context
->current_text_end
)
1203 /* The name hasn't necessarily ended. Merge with
1204 * partial chunk, leave state unchanged.
1206 add_to_partial (context
, context
->start
, context
->iter
);
1210 /* The name has ended. Combine it with the partial chunk
1211 * if any; push it on the stack; enter next state.
1213 add_to_partial (context
, context
->start
, context
->iter
);
1214 context
->tag_stack
=
1215 g_slist_prepend (context
->tag_stack
,
1216 g_string_free (context
->partial_chunk
,
1219 context
->partial_chunk
= NULL
;
1221 context
->state
= STATE_BETWEEN_ATTRIBUTES
;
1222 context
->start
= NULL
;
1226 case STATE_INSIDE_ATTRIBUTE_NAME
:
1227 /* Possible next states: AFTER_ATTRIBUTE_NAME */
1229 advance_to_name_end (context
);
1230 add_to_partial (context
, context
->start
, context
->iter
);
1232 /* read the full name, if we enter the equals sign state
1233 * then add the attribute to the list (without the value),
1234 * otherwise store a partial chunk to be prepended later.
1236 if (context
->iter
!= context
->current_text_end
)
1237 context
->state
= STATE_AFTER_ATTRIBUTE_NAME
;
1240 case STATE_AFTER_ATTRIBUTE_NAME
:
1241 /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1243 skip_spaces (context
);
1245 if (context
->iter
!= context
->current_text_end
)
1247 /* The name has ended. Combine it with the partial chunk
1248 * if any; push it on the stack; enter next state.
1250 add_attribute (context
, g_string_free (context
->partial_chunk
, FALSE
));
1252 context
->partial_chunk
= NULL
;
1253 context
->start
= NULL
;
1255 if (*context
->iter
== '=')
1257 advance_char (context
);
1258 context
->state
= STATE_AFTER_ATTRIBUTE_EQUALS_SIGN
;
1266 G_MARKUP_ERROR_PARSE
,
1267 _("Odd character '%s', expected a '=' after "
1268 "attribute name '%s' of element '%s'"),
1269 utf8_str (context
->iter
, buf
),
1270 current_attribute (context
),
1271 current_element (context
));
1277 case STATE_BETWEEN_ATTRIBUTES
:
1278 /* Possible next states: AFTER_CLOSE_ANGLE,
1279 * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1281 skip_spaces (context
);
1283 if (context
->iter
!= context
->current_text_end
)
1285 if (*context
->iter
== '/')
1287 advance_char (context
);
1288 context
->state
= STATE_AFTER_ELISION_SLASH
;
1290 else if (*context
->iter
== '>')
1293 advance_char (context
);
1294 context
->state
= STATE_AFTER_CLOSE_ANGLE
;
1296 else if (is_name_start_char (context
->iter
))
1298 context
->state
= STATE_INSIDE_ATTRIBUTE_NAME
;
1299 /* start of attribute name */
1300 context
->start
= context
->iter
;
1308 G_MARKUP_ERROR_PARSE
,
1309 _("Odd character '%s', expected a '>' or '/' "
1310 "character to end the start tag of "
1311 "element '%s', or optionally an attribute; "
1312 "perhaps you used an invalid character in "
1313 "an attribute name"),
1314 utf8_str (context
->iter
, buf
),
1315 current_element (context
));
1318 /* If we're done with attributes, invoke
1319 * the start_element callback
1321 if (context
->state
== STATE_AFTER_ELISION_SLASH
||
1322 context
->state
== STATE_AFTER_CLOSE_ANGLE
)
1324 const gchar
*start_name
;
1325 /* Ugly, but the current code expects an empty array instead of NULL */
1326 const gchar
*empty
= NULL
;
1327 const gchar
**attr_names
= &empty
;
1328 const gchar
**attr_values
= &empty
;
1331 /* Call user callback for element start */
1332 start_name
= current_element (context
);
1334 if (context
->cur_attr
>= 0)
1336 attr_names
= (const gchar
**)context
->attr_names
;
1337 attr_values
= (const gchar
**)context
->attr_values
;
1341 if (context
->parser
->start_element
)
1342 (* context
->parser
->start_element
) (context
,
1344 (const gchar
**)attr_names
,
1345 (const gchar
**)attr_values
,
1349 /* Go ahead and free the attributes. */
1350 for (; context
->cur_attr
>= 0; context
->cur_attr
--)
1352 int pos
= context
->cur_attr
;
1353 g_free (context
->attr_names
[pos
]);
1354 g_free (context
->attr_values
[pos
]);
1355 context
->attr_names
[pos
] = context
->attr_values
[pos
] = NULL
;
1357 g_assert (context
->cur_attr
== -1);
1358 g_assert (context
->attr_names
== NULL
||
1359 context
->attr_names
[0] == NULL
);
1360 g_assert (context
->attr_values
== NULL
||
1361 context
->attr_values
[0] == NULL
);
1363 if (tmp_error
!= NULL
)
1364 propagate_error (context
, error
, tmp_error
);
1369 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN
:
1370 /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1372 skip_spaces (context
);
1374 if (context
->iter
!= context
->current_text_end
)
1376 if (*context
->iter
== '"')
1378 advance_char (context
);
1379 context
->state
= STATE_INSIDE_ATTRIBUTE_VALUE_DQ
;
1380 context
->start
= context
->iter
;
1382 else if (*context
->iter
== '\'')
1384 advance_char (context
);
1385 context
->state
= STATE_INSIDE_ATTRIBUTE_VALUE_SQ
;
1386 context
->start
= context
->iter
;
1394 G_MARKUP_ERROR_PARSE
,
1395 _("Odd character '%s', expected an open quote mark "
1396 "after the equals sign when giving value for "
1397 "attribute '%s' of element '%s'"),
1398 utf8_str (context
->iter
, buf
),
1399 current_attribute (context
),
1400 current_element (context
));
1405 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ
:
1406 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ
:
1407 /* Possible next states: BETWEEN_ATTRIBUTES */
1411 if (context
->state
== STATE_INSIDE_ATTRIBUTE_VALUE_SQ
)
1422 if (*context
->iter
== delim
)
1425 while (advance_char (context
));
1427 if (context
->iter
== context
->current_text_end
)
1429 /* The value hasn't necessarily ended. Merge with
1430 * partial chunk, leave state unchanged.
1432 add_to_partial (context
, context
->start
, context
->iter
);
1436 /* The value has ended at the quote mark. Combine it
1437 * with the partial chunk if any; set it for the current
1442 add_to_partial (context
, context
->start
, context
->iter
);
1444 g_assert (context
->cur_attr
>= 0);
1446 if (unescape_text (context
,
1447 context
->partial_chunk
->str
,
1448 context
->partial_chunk
->str
+
1449 context
->partial_chunk
->len
,
1453 /* success, advance past quote and set state. */
1454 context
->attr_values
[context
->cur_attr
] = g_string_free (unescaped
, FALSE
);
1455 advance_char (context
);
1456 context
->state
= STATE_BETWEEN_ATTRIBUTES
;
1457 context
->start
= NULL
;
1460 truncate_partial (context
);
1464 case STATE_INSIDE_TEXT
:
1465 /* Possible next states: AFTER_OPEN_ANGLE */
1468 if (*context
->iter
== '<')
1471 while (advance_char (context
));
1473 /* The text hasn't necessarily ended. Merge with
1474 * partial chunk, leave state unchanged.
1477 add_to_partial (context
, context
->start
, context
->iter
);
1479 if (context
->iter
!= context
->current_text_end
)
1481 GString
*unescaped
= NULL
;
1483 /* The text has ended at the open angle. Call the text
1487 if (unescape_text (context
,
1488 context
->partial_chunk
->str
,
1489 context
->partial_chunk
->str
+
1490 context
->partial_chunk
->len
,
1494 GError
*tmp_error
= NULL
;
1496 if (context
->parser
->text
)
1497 (*context
->parser
->text
) (context
,
1503 g_string_free (unescaped
, TRUE
);
1505 if (tmp_error
== NULL
)
1507 /* advance past open angle and set state. */
1508 advance_char (context
);
1509 context
->state
= STATE_AFTER_OPEN_ANGLE
;
1510 /* could begin a passthrough */
1511 context
->start
= context
->iter
;
1514 propagate_error (context
, error
, tmp_error
);
1517 truncate_partial (context
);
1521 case STATE_AFTER_CLOSE_TAG_SLASH
:
1522 /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1523 if (is_name_start_char (context
->iter
))
1525 context
->state
= STATE_INSIDE_CLOSE_TAG_NAME
;
1527 /* start of tag name */
1528 context
->start
= context
->iter
;
1536 G_MARKUP_ERROR_PARSE
,
1537 _("'%s' is not a valid character following "
1538 "the characters '</'; '%s' may not begin an "
1540 utf8_str (context
->iter
, buf
),
1541 utf8_str (context
->iter
, buf
));
1545 case STATE_INSIDE_CLOSE_TAG_NAME
:
1546 /* Possible next state: AFTER_CLOSE_TAG_NAME */
1547 advance_to_name_end (context
);
1548 add_to_partial (context
, context
->start
, context
->iter
);
1550 if (context
->iter
!= context
->current_text_end
)
1551 context
->state
= STATE_AFTER_CLOSE_TAG_NAME
;
1554 case STATE_AFTER_CLOSE_TAG_NAME
:
1555 /* Possible next state: AFTER_CLOSE_TAG_SLASH */
1557 skip_spaces (context
);
1559 if (context
->iter
!= context
->current_text_end
)
1563 /* The name has ended. Combine it with the partial chunk
1564 * if any; check that it matches stack top and pop
1565 * stack; invoke proper callback; enter next state.
1567 close_name
= g_string_free (context
->partial_chunk
, FALSE
);
1568 context
->partial_chunk
= NULL
;
1570 if (*context
->iter
!= '>')
1576 G_MARKUP_ERROR_PARSE
,
1577 _("'%s' is not a valid character following "
1578 "the close element name '%s'; the allowed "
1579 "character is '>'"),
1580 utf8_str (context
->iter
, buf
),
1583 else if (context
->tag_stack
== NULL
)
1587 G_MARKUP_ERROR_PARSE
,
1588 _("Element '%s' was closed, no element "
1589 "is currently open"),
1592 else if (strcmp (close_name
, current_element (context
)) != 0)
1596 G_MARKUP_ERROR_PARSE
,
1597 _("Element '%s' was closed, but the currently "
1598 "open element is '%s'"),
1600 current_element (context
));
1605 advance_char (context
);
1606 context
->state
= STATE_AFTER_CLOSE_ANGLE
;
1607 context
->start
= NULL
;
1609 /* call the end_element callback */
1611 if (context
->parser
->end_element
)
1612 (* context
->parser
->end_element
) (context
,
1618 /* Pop the tag stack */
1619 g_free (context
->tag_stack
->data
);
1620 context
->tag_stack
= g_slist_delete_link (context
->tag_stack
,
1621 context
->tag_stack
);
1624 propagate_error (context
, error
, tmp_error
);
1627 g_free (close_name
);
1631 case STATE_INSIDE_PASSTHROUGH
:
1632 /* Possible next state: AFTER_CLOSE_ANGLE */
1635 if (*context
->iter
== '<')
1637 if (*context
->iter
== '>')
1643 add_to_partial (context
, context
->start
, context
->iter
);
1644 context
->start
= context
->iter
;
1646 str
= context
->partial_chunk
->str
;
1647 len
= context
->partial_chunk
->len
;
1649 if (str
[1] == '?' && str
[len
- 1] == '?')
1651 if (strncmp (str
, "<!--", 4) == 0 &&
1652 strcmp (str
+ len
- 2, "--") == 0)
1654 if (strncmp (str
, "<![CDATA[", 9) == 0 &&
1655 strcmp (str
+ len
- 2, "]]") == 0)
1657 if (strncmp (str
, "<!DOCTYPE", 9) == 0 &&
1658 context
->balance
== 0)
1662 while (advance_char (context
));
1664 if (context
->iter
== context
->current_text_end
)
1666 /* The passthrough hasn't necessarily ended. Merge with
1667 * partial chunk, leave state unchanged.
1669 add_to_partial (context
, context
->start
, context
->iter
);
1673 /* The passthrough has ended at the close angle. Combine
1674 * it with the partial chunk if any. Call the passthrough
1675 * callback. Note that the open/close angles are
1676 * included in the text of the passthrough.
1678 GError
*tmp_error
= NULL
;
1680 advance_char (context
); /* advance past close angle */
1681 add_to_partial (context
, context
->start
, context
->iter
);
1683 if (context
->flags
& G_MARKUP_TREAT_CDATA_AS_TEXT
&&
1684 strncmp (context
->partial_chunk
->str
, "<![CDATA[", 9) == 0)
1686 if (context
->parser
->text
)
1687 (*context
->parser
->text
) (context
,
1688 context
->partial_chunk
->str
+ 9,
1689 context
->partial_chunk
->len
- 12,
1693 else if (context
->parser
->passthrough
)
1694 (*context
->parser
->passthrough
) (context
,
1695 context
->partial_chunk
->str
,
1696 context
->partial_chunk
->len
,
1700 truncate_partial (context
);
1702 if (tmp_error
== NULL
)
1704 context
->state
= STATE_AFTER_CLOSE_ANGLE
;
1705 context
->start
= context
->iter
; /* could begin text */
1708 propagate_error (context
, error
, tmp_error
);
1717 g_assert_not_reached ();
1723 context
->parsing
= FALSE
;
1725 return context
->state
!= STATE_ERROR
;
1729 * g_markup_parse_context_end_parse:
1730 * @context: a #GMarkupParseContext
1731 * @error: return location for a #GError
1733 * Signals to the #GMarkupParseContext that all data has been
1734 * fed into the parse context with g_markup_parse_context_parse().
1735 * This function reports an error if the document isn't complete,
1736 * for example if elements are still open.
1738 * Return value: %TRUE on success, %FALSE if an error was set
1741 g_markup_parse_context_end_parse (GMarkupParseContext
*context
,
1744 g_return_val_if_fail (context
!= NULL
, FALSE
);
1745 g_return_val_if_fail (!context
->parsing
, FALSE
);
1746 g_return_val_if_fail (context
->state
!= STATE_ERROR
, FALSE
);
1748 if (context
->partial_chunk
!= NULL
)
1750 g_string_free (context
->partial_chunk
, TRUE
);
1751 context
->partial_chunk
= NULL
;
1754 if (context
->document_empty
)
1756 set_error (context
, error
, G_MARKUP_ERROR_EMPTY
,
1757 _("Document was empty or contained only whitespace"));
1761 context
->parsing
= TRUE
;
1763 switch (context
->state
)
1769 case STATE_AFTER_OPEN_ANGLE
:
1770 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1771 _("Document ended unexpectedly just after an open angle bracket '<'"));
1774 case STATE_AFTER_CLOSE_ANGLE
:
1775 if (context
->tag_stack
!= NULL
)
1777 /* Error message the same as for INSIDE_TEXT */
1778 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1779 _("Document ended unexpectedly with elements still open - "
1780 "'%s' was the last element opened"),
1781 current_element (context
));
1785 case STATE_AFTER_ELISION_SLASH
:
1786 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1787 _("Document ended unexpectedly, expected to see a close angle "
1788 "bracket ending the tag <%s/>"), current_element (context
));
1791 case STATE_INSIDE_OPEN_TAG_NAME
:
1792 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1793 _("Document ended unexpectedly inside an element name"));
1796 case STATE_INSIDE_ATTRIBUTE_NAME
:
1797 case STATE_AFTER_ATTRIBUTE_NAME
:
1798 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1799 _("Document ended unexpectedly inside an attribute name"));
1802 case STATE_BETWEEN_ATTRIBUTES
:
1803 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1804 _("Document ended unexpectedly inside an element-opening "
1808 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN
:
1809 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1810 _("Document ended unexpectedly after the equals sign "
1811 "following an attribute name; no attribute value"));
1814 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ
:
1815 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ
:
1816 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1817 _("Document ended unexpectedly while inside an attribute "
1821 case STATE_INSIDE_TEXT
:
1822 g_assert (context
->tag_stack
!= NULL
);
1823 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1824 _("Document ended unexpectedly with elements still open - "
1825 "'%s' was the last element opened"),
1826 current_element (context
));
1829 case STATE_AFTER_CLOSE_TAG_SLASH
:
1830 case STATE_INSIDE_CLOSE_TAG_NAME
:
1831 case STATE_AFTER_CLOSE_TAG_NAME
:
1832 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1833 _("Document ended unexpectedly inside the close tag for "
1834 "element '%s'"), current_element (context
));
1837 case STATE_INSIDE_PASSTHROUGH
:
1838 set_error (context
, error
, G_MARKUP_ERROR_PARSE
,
1839 _("Document ended unexpectedly inside a comment or "
1840 "processing instruction"));
1845 g_assert_not_reached ();
1849 context
->parsing
= FALSE
;
1851 return context
->state
!= STATE_ERROR
;
1855 * g_markup_parse_context_get_element:
1856 * @context: a #GMarkupParseContext
1857 * @returns: the name of the currently open element, or %NULL
1859 * Retrieves the name of the currently open element.
1861 * If called from the start_element or end_element handlers this will
1862 * give the element_name as passed to those functions. For the parent
1863 * elements, see g_markup_parse_context_get_element_stack().
1867 G_CONST_RETURN gchar
*
1868 g_markup_parse_context_get_element (GMarkupParseContext
*context
)
1870 g_return_val_if_fail (context
!= NULL
, NULL
);
1872 if (context
->tag_stack
== NULL
)
1875 return current_element (context
);
1879 * g_markup_parse_context_get_element_stack:
1880 * @context: a #GMarkupParseContext
1882 * Retrieves the element stack from the internal state of the parser.
1883 * The returned #GSList is a list of strings where the first item is
1884 * the currently open tag (as would be returned by
1885 * g_markup_parse_context_get_element()) and the next item is its
1888 * This function is intended to be used in the start_element and
1889 * end_element handlers where g_markup_parse_context_get_element()
1890 * would merely return the name of the element that is being
1893 * Returns: the element stack, which must not be modified
1897 G_CONST_RETURN GSList
*
1898 g_markup_parse_context_get_element_stack (GMarkupParseContext
*context
)
1900 g_return_val_if_fail (context
!= NULL
, NULL
);
1902 return context
->tag_stack
;
1906 * g_markup_parse_context_get_position:
1907 * @context: a #GMarkupParseContext
1908 * @line_number: return location for a line number, or %NULL
1909 * @char_number: return location for a char-on-line number, or %NULL
1911 * Retrieves the current line number and the number of the character on
1912 * that line. Intended for use in error messages; there are no strict
1913 * semantics for what constitutes the "current" line number other than
1914 * "the best number we could come up with for error messages."
1918 g_markup_parse_context_get_position (GMarkupParseContext
*context
,
1922 g_return_if_fail (context
!= NULL
);
1925 *line_number
= context
->line_number
;
1928 *char_number
= context
->char_number
;
1932 append_escaped_text (GString
*str
,
1941 end
= text
+ length
;
1946 next
= g_utf8_next_char (p
);
1951 g_string_append (str
, "&");
1955 g_string_append (str
, "<");
1959 g_string_append (str
, ">");
1963 g_string_append (str
, "'");
1967 g_string_append (str
, """);
1971 c
= g_utf8_get_char (p
);
1972 if ((0x1 <= c
&& c
<= 0x8) ||
1973 (0xb <= c
&& c
<= 0xc) ||
1974 (0xe <= c
&& c
<= 0x1f) ||
1975 (0x7f <= c
&& c
<= 0x84) ||
1976 (0x86 <= c
&& c
<= 0x9f))
1977 g_string_append_printf (str
, "&#x%x;", c
);
1979 g_string_append_len (str
, p
, next
- p
);
1988 * g_markup_escape_text:
1989 * @text: some valid UTF-8 text
1990 * @length: length of @text in bytes, or -1 if the text is nul-terminated
1992 * Escapes text so that the markup parser will parse it verbatim.
1993 * Less than, greater than, ampersand, etc. are replaced with the
1994 * corresponding entities. This function would typically be used
1995 * when writing out a file to be parsed with the markup parser.
1997 * Note that this function doesn't protect whitespace and line endings
1998 * from being processed according to the XML rules for normalization
1999 * of line endings and attribute values.
2001 * Return value: a newly allocated string with the escaped text
2004 g_markup_escape_text (const gchar
*text
,
2009 g_return_val_if_fail (text
!= NULL
, NULL
);
2012 length
= strlen (text
);
2014 /* prealloc at least as long as original text */
2015 str
= g_string_sized_new (length
);
2016 append_escaped_text (str
, text
, length
);
2018 return g_string_free (str
, FALSE
);
2023 * @format: a printf-style format string
2024 * @after: location to store a pointer to the character after
2025 * the returned conversion. On a %NULL return, returns the
2026 * pointer to the trailing NUL in the string
2028 * Find the next conversion in a printf-style format string.
2029 * Partially based on code from printf-parser.c,
2030 * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
2032 * Return value: pointer to the next conversion in @format,
2033 * or %NULL, if none.
2036 find_conversion (const char *format
,
2039 const char *start
= format
;
2042 while (*start
!= '\0' && *start
!= '%')
2059 /* Test for positional argument. */
2060 if (*cp
>= '0' && *cp
<= '9')
2064 for (np
= cp
; *np
>= '0' && *np
<= '9'; np
++)
2070 /* Skip the flags. */
2084 /* Skip the field width. */
2089 /* Test for positional argument. */
2090 if (*cp
>= '0' && *cp
<= '9')
2094 for (np
= cp
; *np
>= '0' && *np
<= '9'; np
++)
2102 for (; *cp
>= '0' && *cp
<= '9'; cp
++)
2106 /* Skip the precision. */
2112 /* Test for positional argument. */
2113 if (*cp
>= '0' && *cp
<= '9')
2117 for (np
= cp
; *np
>= '0' && *np
<= '9'; np
++)
2125 for (; *cp
>= '0' && *cp
<= '9'; cp
++)
2130 /* Skip argument type/size specifiers. */
2131 while (*cp
== 'h' ||
2140 /* Skip the conversion character. */
2148 * g_markup_vprintf_escaped:
2149 * @format: printf() style format string
2150 * @args: variable argument list, similar to vprintf()
2152 * Formats the data in @args according to @format, escaping
2153 * all string and character arguments in the fashion
2154 * of g_markup_escape_text(). See g_markup_printf_escaped().
2156 * Return value: newly allocated result from formatting
2157 * operation. Free with g_free().
2162 g_markup_vprintf_escaped (const char *format
,
2167 GString
*result
= NULL
;
2168 gchar
*output1
= NULL
;
2169 gchar
*output2
= NULL
;
2170 const char *p
, *op1
, *op2
;
2173 /* The technique here, is that we make two format strings that
2174 * have the identical conversions in the identical order to the
2175 * original strings, but differ in the text in-between. We
2176 * then use the normal g_strdup_vprintf() to format the arguments
2177 * with the two new format strings. By comparing the results,
2178 * we can figure out what segments of the output come from
2179 * the the original format string, and what from the arguments,
2180 * and thus know what portions of the string to escape.
2182 * For instance, for:
2184 * g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5);
2186 * We form the two format strings "%sX%dX" and %sY%sY". The results
2187 * of formatting with those two strings are
2189 * "%sX%dX" => "Susan & FredX5X"
2190 * "%sY%dY" => "Susan & FredY5Y"
2192 * To find the span of the first argument, we find the first position
2193 * where the two arguments differ, which tells us that the first
2194 * argument formatted to "Susan & Fred". We then escape that
2195 * to "Susan & Fred" and join up with the intermediate portions
2196 * of the format string and the second argument to get
2197 * "Susan & Fred ate 5 apples".
2200 /* Create the two modified format strings
2202 format1
= g_string_new (NULL
);
2203 format2
= g_string_new (NULL
);
2208 const char *conv
= find_conversion (p
, &after
);
2212 g_string_append_len (format1
, conv
, after
- conv
);
2213 g_string_append_c (format1
, 'X');
2214 g_string_append_len (format2
, conv
, after
- conv
);
2215 g_string_append_c (format2
, 'Y');
2220 /* Use them to format the arguments
2222 G_VA_COPY (args2
, args
);
2224 output1
= g_strdup_vprintf (format1
->str
, args
);
2231 output2
= g_strdup_vprintf (format2
->str
, args2
);
2236 result
= g_string_new (NULL
);
2238 /* Iterate through the original format string again,
2239 * copying the non-conversion portions and the escaped
2240 * converted arguments to the output string.
2248 const char *output_start
;
2249 const char *conv
= find_conversion (p
, &after
);
2252 if (!conv
) /* The end, after points to the trailing \0 */
2254 g_string_append_len (result
, p
, after
- p
);
2258 g_string_append_len (result
, p
, conv
- p
);
2260 while (*op1
== *op2
)
2266 escaped
= g_markup_escape_text (output_start
, op1
- output_start
);
2267 g_string_append (result
, escaped
);
2276 g_string_free (format1
, TRUE
);
2277 g_string_free (format2
, TRUE
);
2282 return g_string_free (result
, FALSE
);
2288 * g_markup_printf_escaped:
2289 * @format: printf() style format string
2290 * @Varargs: the arguments to insert in the format string
2292 * Formats arguments according to @format, escaping
2293 * all string and character arguments in the fashion
2294 * of g_markup_escape_text(). This is useful when you
2295 * want to insert literal strings into XML-style markup
2296 * output, without having to worry that the strings
2297 * might themselves contain markup.
2300 * const char *store = "Fortnum & Mason";
2301 * const char *item = "Tea";
2304 * output = g_markup_printf_escaped ("<purchase>"
2305 * "<store>%s</store>"
2306 * "<item>%s</item>"
2307 * "</purchase>",
2311 * Return value: newly allocated result from formatting
2312 * operation. Free with g_free().
2317 g_markup_printf_escaped (const char *format
, ...)
2322 va_start (args
, format
);
2323 result
= g_markup_vprintf_escaped (format
, args
);
2330 g_markup_parse_boolean (const char *string
,
2333 char const * const falses
[] = { "false", "f", "no", "n", "0" };
2334 char const * const trues
[] = { "true", "t", "yes", "y", "1" };
2337 for (i
= 0; i
< G_N_ELEMENTS (falses
); i
++)
2339 if (g_ascii_strcasecmp (string
, falses
[i
]) == 0)
2348 for (i
= 0; i
< G_N_ELEMENTS (trues
); i
++)
2350 if (g_ascii_strcasecmp (string
, trues
[i
]) == 0)
2363 * GMarkupCollectType:
2364 * @G_MARKUP_COLLECT_INVALID: used to terminate the list of attributes
2366 * @G_MARKUP_COLLECT_STRING: collect the string pointer directly from
2367 * the attribute_values[] array. Expects a
2368 * parameter of type (const char **). If
2369 * %G_MARKUP_COLLECT_OPTIONAL is specified
2370 * and the attribute isn't present then the
2371 * pointer will be set to %NULL.
2372 * @G_MARKUP_COLLECT_STRDUP: as with %G_MARKUP_COLLECT_STRING, but
2373 * expects a paramter of type (char **) and
2374 * g_strdup()s the returned pointer. The
2375 * pointer must be freed with g_free().
2376 * @G_MARKUP_COLLECT_BOOLEAN: expects a parameter of type (gboolean *)
2377 * and parses the attribute value as a
2378 * boolean. Sets %FALSE if the attribute
2379 * isn't present. Valid boolean values
2380 * consist of (case insensitive) "false",
2381 * "f", "no", "n", "0" and "true", "t",
2383 * @G_MARKUP_COLLECT_TRISTATE: as with %G_MARKUP_COLLECT_BOOLEAN, but
2384 * in the case of a missing attribute a
2385 * value is set that compares equal to
2386 * neither %FALSE nor %TRUE.
2387 * G_MARKUP_COLLECT_OPTIONAL is implied.
2388 * @G_MARKUP_COLLECT_OPTIONAL: can be bitwise ORed with the other
2389 * fields. If present, allows the
2390 * attribute not to appear. A default
2391 * value is set depending on what value
2394 * A mixed enumerated type and flags field. You must specify one type
2395 * (string, strdup, boolean, tristate). Additionally, you may
2396 * optionally bitwise OR the type with the flag
2397 * %G_MARKUP_COLLECT_OPTIONAL.
2399 * It is likely that this enum will be extended in the future to
2400 * support other types.
2404 * g_markup_collect_attributes:
2405 * @element_name: the current tag name
2406 * @attribute_names: the attribute names
2407 * @attribute_values: the attribute values
2408 * @error: a pointer to a #GError or %NULL
2409 * @first_type: the #GMarkupCollectType of the
2411 * @first_attr: the name of the first attribute
2412 * @...: a pointer to the storage location of the
2413 * first attribute (or %NULL), followed by
2414 * more types names and pointers, ending
2415 * with %G_MARKUP_COLLECT_INVALID.
2417 * Collects the attributes of the element from the
2418 * data passed to the #GMarkupParser start_element
2419 * function, dealing with common error conditions
2420 * and supporting boolean values.
2422 * This utility function is not required to write
2423 * a parser but can save a lot of typing.
2425 * The @element_name, @attribute_names,
2426 * @attribute_values and @error parameters passed
2427 * to the start_element callback should be passed
2428 * unmodified to this function.
2430 * Following these arguments is a list of
2431 * "supported" attributes to collect. It is an
2432 * error to specify multiple attributes with the
2433 * same name. If any attribute not in the list
2434 * appears in the @attribute_names array then an
2435 * unknown attribute error will result.
2437 * The #GMarkupCollectType field allows specifying
2438 * the type of collection to perform and if a
2439 * given attribute must appear or is optional.
2441 * The attribute name is simply the name of the
2442 * attribute to collect.
2444 * The pointer should be of the appropriate type
2445 * (see the descriptions under
2446 * #GMarkupCollectType) and may be %NULL in case a
2447 * particular attribute is to be allowed but
2450 * This function deals with issuing errors for missing attributes
2451 * (of type %G_MARKUP_ERROR_MISSING_ATTRIBUTE), unknown attributes
2452 * (of type %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE) and duplicate
2453 * attributes (of type %G_MARKUP_ERROR_INVALID_CONTENT) as well
2454 * as parse errors for boolean-valued attributes (again of type
2455 * %G_MARKUP_ERROR_INVALID_CONTENT). In all of these cases %FALSE
2456 * will be returned and @error will be set as appropriate.
2458 * Return value: %TRUE if successful
2463 g_markup_collect_attributes (const gchar
*element_name
,
2464 const gchar
**attribute_names
,
2465 const gchar
**attribute_values
,
2467 GMarkupCollectType first_type
,
2468 const gchar
*first_attr
,
2471 GMarkupCollectType type
;
2483 va_start (ap
, first_attr
);
2484 while (type
!= G_MARKUP_COLLECT_INVALID
)
2489 mandatory
= !(type
& G_MARKUP_COLLECT_OPTIONAL
);
2490 type
&= (G_MARKUP_COLLECT_OPTIONAL
- 1);
2492 /* tristate records a value != TRUE and != FALSE
2493 * for the case where the attribute is missing
2495 if (type
== G_MARKUP_COLLECT_TRISTATE
)
2498 for (i
= 0; attribute_names
[i
]; i
++)
2499 if (i
>= 40 || !(collected
& (G_GUINT64_CONSTANT(1) << i
)))
2500 if (!strcmp (attribute_names
[i
], attr
))
2503 /* ISO C99 only promises that the user can pass up to 127 arguments.
2504 * Subtracting the first 4 arguments plus the final NULL and dividing
2505 * by 3 arguments per collected attribute, we are left with a maximum
2506 * number of supported attributes of (127 - 5) / 3 = 40.
2508 * In reality, nobody is ever going to call us with anywhere close to
2509 * 40 attributes to collect, so it is safe to assume that if i > 40
2510 * then the user has given some invalid or repeated arguments. These
2511 * problems will be caught and reported at the end of the function.
2513 * We know at this point that we have an error, but we don't know
2514 * what error it is, so just continue...
2517 collected
|= (G_GUINT64_CONSTANT(1) << i
);
2519 value
= attribute_values
[i
];
2521 if (value
== NULL
&& mandatory
)
2523 g_set_error (error
, G_MARKUP_ERROR
,
2524 G_MARKUP_ERROR_MISSING_ATTRIBUTE
,
2525 "element '%s' requires attribute '%s'",
2526 element_name
, attr
);
2534 case G_MARKUP_COLLECT_STRING
:
2536 const char **str_ptr
;
2538 str_ptr
= va_arg (ap
, const char **);
2540 if (str_ptr
!= NULL
)
2545 case G_MARKUP_COLLECT_STRDUP
:
2549 str_ptr
= va_arg (ap
, char **);
2551 if (str_ptr
!= NULL
)
2552 *str_ptr
= g_strdup (value
);
2556 case G_MARKUP_COLLECT_BOOLEAN
:
2557 case G_MARKUP_COLLECT_TRISTATE
:
2562 bool_ptr
= va_arg (ap
, gboolean
*);
2564 if (bool_ptr
!= NULL
)
2566 if (type
== G_MARKUP_COLLECT_TRISTATE
)
2567 /* constructivists rejoice!
2568 * neither false nor true...
2572 else /* G_MARKUP_COLLECT_BOOLEAN */
2578 if (!g_markup_parse_boolean (value
, va_arg (ap
, gboolean
*)))
2580 g_set_error (error
, G_MARKUP_ERROR
,
2581 G_MARKUP_ERROR_INVALID_CONTENT
,
2582 "element '%s', attribute '%s', value '%s' "
2583 "cannot be parsed as a boolean value",
2584 element_name
, attr
, value
);
2594 g_assert_not_reached ();
2597 type
= va_arg (ap
, GMarkupCollectType
);
2598 attr
= va_arg (ap
, const char *);
2603 /* ensure we collected all the arguments */
2604 for (i
= 0; attribute_names
[i
]; i
++)
2605 if ((collected
& (G_GUINT64_CONSTANT(1) << i
)) == 0)
2607 /* attribute not collected: could be caused by two things.
2609 * 1) it doesn't exist in our list of attributes
2610 * 2) it existed but was matched by a duplicate attribute earlier
2616 for (j
= 0; j
< i
; j
++)
2617 if (strcmp (attribute_names
[i
], attribute_names
[j
]) == 0)
2621 /* j is now the first occurance of attribute_names[i] */
2623 g_set_error (error
, G_MARKUP_ERROR
,
2624 G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE
,
2625 "attribute '%s' invalid for element '%s'",
2626 attribute_names
[i
], element_name
);
2628 g_set_error (error
, G_MARKUP_ERROR
,
2629 G_MARKUP_ERROR_INVALID_CONTENT
,
2630 "attribute '%s' given multiple times for element '%s'",
2631 attribute_names
[i
], element_name
);
2639 /* replay the above to free allocations */
2643 va_start (ap
, first_attr
);
2644 while (type
!= G_MARKUP_COLLECT_INVALID
)
2648 ptr
= va_arg (ap
, gpointer
);
2653 switch (type
& (G_MARKUP_COLLECT_OPTIONAL
- 1))
2655 case G_MARKUP_COLLECT_STRDUP
:
2657 g_free (*(char **) ptr
);
2659 case G_MARKUP_COLLECT_STRING
:
2660 *(char **) ptr
= NULL
;
2663 case G_MARKUP_COLLECT_BOOLEAN
:
2664 *(gboolean
*) ptr
= FALSE
;
2667 case G_MARKUP_COLLECT_TRISTATE
:
2668 *(gboolean
*) ptr
= -1;
2672 type
= va_arg (ap
, GMarkupCollectType
);
2673 attr
= va_arg (ap
, const char *);
2683 #define __G_MARKUP_C__
2684 #include "galiasdef.c"