iconv: Bail out of the loop when an illegal sequence of bytes occurs.
[elinks/elinks-j605.git] / src / dom / sgml / scanner.h
blob7c3edcfd81bf685490dce766f5bcf97495566291
1 #ifndef EL_DOM_SGML_SCANNER_H
2 #define EL_DOM_SGML_SCANNER_H
4 #include "dom/scanner.h"
6 enum sgml_token_type {
7 /* Char tokens: */
9 /* Char tokens range from 1 to 255 and have their char value as type */
10 /* meaning non char tokens have values from 256 and up. */
12 /* Low level string tokens: */
14 SGML_TOKEN_IDENT = 256, /* [0-9a-zA-Z_-:.]+ */
15 SGML_TOKEN_TAG_END, /* > or ?> */
16 SGML_TOKEN_STRING, /* Char sequence delimted by matching ' or " */
18 /* High level string tokens: */
20 SGML_TOKEN_NOTATION, /* <!{ident} until > */
21 SGML_TOKEN_NOTATION_COMMENT, /* <!-- until --> */
22 SGML_TOKEN_NOTATION_DOCTYPE, /* <!DOCTYPE until > */
23 SGML_TOKEN_NOTATION_ELEMENT, /* <!ELEMENT until > */
24 SGML_TOKEN_NOTATION_ENTITY, /* <!ENTITY until > */
25 SGML_TOKEN_NOTATION_ATTLIST, /* <!ATTLIST until > */
27 SGML_TOKEN_CDATA_SECTION, /* <![CDATA[ until ]]> */
29 SGML_TOKEN_PROCESS, /* <?{ident} */
30 SGML_TOKEN_PROCESS_XML, /* <?xml */
31 SGML_TOKEN_PROCESS_XML_STYLESHEET,/* <?xml-stylesheet */
32 SGML_TOKEN_PROCESS_DATA, /* data after <?{ident} until ?> */
34 SGML_TOKEN_ELEMENT, /* <{ident}> */
35 SGML_TOKEN_ELEMENT_BEGIN, /* <{ident} */
36 SGML_TOKEN_ELEMENT_END, /* </{ident}> or </> */
37 SGML_TOKEN_ELEMENT_EMPTY_END, /* /> */
38 SGML_TOKEN_ATTRIBUTE, /* [^>\t\r\n\f\v ]+ */
40 SGML_TOKEN_ENTITY, /* &ident; */
42 SGML_TOKEN_TEXT, /* [^<&]+ */
43 SGML_TOKEN_SPACE, /* [\t\r\n\f\v ]+ */
45 /* Special tokens: */
47 /* A special token for unrecognized strings */
48 SGML_TOKEN_GARBAGE,
50 /* A special token for marking that it is assummed that the token is
51 * not complete. Only meaningful if scanner->complete is incomplete. */
52 SGML_TOKEN_INCOMPLETE,
54 /* A special token for reporting that an error in the markup was found.
55 * Only in effect when error checking has been requested. */
56 SGML_TOKEN_ERROR,
58 /* Token type used internally when scanning to signal that the token
59 * should not be recorded in the scanners token table. */
60 SGML_TOKEN_SKIP,
62 /* Another internal token type used both to mark unused tokens in the
63 * scanner table as invalid or when scanning to signal that the
64 * scanning should end. */
65 SGML_TOKEN_NONE = 0,
68 /* The SGML tokenizer maintains a state (in the scanner->state member) that can
69 * be either text, element, or processing instruction state. The state has only
70 * meaning while doing the actual scanning and should not be used at the
71 * parsing time. It can however be used to initialize the scanner to a specific
72 * state. */
73 enum sgml_scanner_state {
74 SGML_STATE_TEXT,
75 SGML_STATE_ELEMENT,
76 SGML_STATE_PROC_INST,
79 extern struct dom_scanner_info sgml_scanner_info;
81 /* Treat '<' as more valuable then '>' so that scanning of '<a<b>' using
82 * skipping to next '>' will stop at the second '<'. */
83 #define get_sgml_precedence(token_type) \
84 ((token_type) == '<' ? (1 << 11) : \
85 (token_type) == '>' ? (1 << 10) : 0)
87 #define skip_sgml_tokens(scanner, type) \
88 skip_dom_scanner_tokens(scanner, type, get_sgml_precedence(type))
90 #endif