1 #ifndef EL_DOM_SGML_SCANNER_H
2 #define EL_DOM_SGML_SCANNER_H
4 #include "dom/scanner.h"
9 /* Char tokens range from 1 to 255 and have their char value as type */
10 /* meaning non char tokens have values from 256 and up. */
12 /* Low level string tokens: */
14 SGML_TOKEN_IDENT
= 256, /* [0-9a-zA-Z_-:.]+ */
15 SGML_TOKEN_TAG_END
, /* > or ?> */
16 SGML_TOKEN_STRING
, /* Char sequence delimted by matching ' or " */
18 /* High level string tokens: */
20 SGML_TOKEN_NOTATION
, /* <!{ident} until > */
21 SGML_TOKEN_NOTATION_COMMENT
, /* <!-- until --> */
22 SGML_TOKEN_NOTATION_DOCTYPE
, /* <!DOCTYPE until > */
23 SGML_TOKEN_NOTATION_ELEMENT
, /* <!ELEMENT until > */
24 SGML_TOKEN_NOTATION_ENTITY
, /* <!ENTITY until > */
25 SGML_TOKEN_NOTATION_ATTLIST
, /* <!ATTLIST until > */
27 SGML_TOKEN_CDATA_SECTION
, /* <![CDATA[ until ]]> */
29 SGML_TOKEN_PROCESS
, /* <?{ident} */
30 SGML_TOKEN_PROCESS_XML
, /* <?xml */
31 SGML_TOKEN_PROCESS_XML_STYLESHEET
,/* <?xml-stylesheet */
32 SGML_TOKEN_PROCESS_DATA
, /* data after <?{ident} until ?> */
34 SGML_TOKEN_ELEMENT
, /* <{ident}> */
35 SGML_TOKEN_ELEMENT_BEGIN
, /* <{ident} */
36 SGML_TOKEN_ELEMENT_END
, /* </{ident}> or </> */
37 SGML_TOKEN_ELEMENT_EMPTY_END
, /* /> */
38 SGML_TOKEN_ATTRIBUTE
, /* [^>\t\r\n\f\v ]+ */
40 SGML_TOKEN_ENTITY
, /* &ident; */
42 SGML_TOKEN_TEXT
, /* [^<&]+ */
43 SGML_TOKEN_SPACE
, /* [\t\r\n\f\v ]+ */
47 /* A special token for unrecognized strings */
50 /* A special token for marking that it is assummed that the token is
51 * not complete. Only meaningful if scanner->complete is incomplete. */
52 SGML_TOKEN_INCOMPLETE
,
54 /* A special token for reporting that an error in the markup was found.
55 * Only in effect when error checking has been requested. */
58 /* Token type used internally when scanning to signal that the token
59 * should not be recorded in the scanners token table. */
62 /* Another internal token type used both to mark unused tokens in the
63 * scanner table as invalid or when scanning to signal that the
64 * scanning should end. */
68 /* The SGML tokenizer maintains a state (in the scanner->state member) that can
69 * be either text, element, or processing instruction state. The state has only
70 * meaning while doing the actual scanning and should not be used at the
71 * parsing time. It can however be used to initialize the scanner to a specific
73 enum sgml_scanner_state
{
79 extern struct dom_scanner_info sgml_scanner_info
;
81 /* Treat '<' as more valuable then '>' so that scanning of '<a<b>' using
82 * skipping to next '>' will stop at the second '<'. */
83 #define get_sgml_precedence(token_type) \
84 ((token_type) == '<' ? (1 << 11) : \
85 (token_type) == '>' ? (1 << 10) : 0)
87 #define skip_sgml_tokens(scanner, type) \
88 skip_dom_scanner_tokens(scanner, type, get_sgml_precedence(type))