1 ! Copyright (C) 2008 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays html.parser.utils hashtables io kernel
4 namespaces make prettyprint quotations sequences splitting
5 html.parser.state strings unicode.categories unicode.case ;
8 TUPLE: tag name attributes text closing? ;
18 : closing-tag? ( string -- ? )
20 [ [ first ] [ peek ] bi [ CHAR: / = ] bi@ or ] if-empty ;
22 : <tag> ( name attributes closing? -- tag )
28 : make-tag ( string attribs -- tag )
29 [ [ closing-tag? ] keep "/" trim1 ] dip rot <tag> ;
31 : make-text-tag ( string -- tag )
36 : make-comment-tag ( string -- tag )
41 : make-dtd-tag ( string -- tag )
46 : read-whitespace ( -- string )
47 [ get-char blank? not ] take-until ;
49 : read-whitespace* ( -- ) read-whitespace drop ;
51 : read-token ( -- string )
53 [ get-char blank? ] take-until ;
55 : read-single-quote ( -- string )
56 [ get-char CHAR: ' = ] take-until ;
58 : read-double-quote ( -- string )
59 [ get-char CHAR: " = ] take-until ;
61 : read-quote ( -- string )
62 get-char next CHAR: ' =
63 [ read-single-quote ] [ read-double-quote ] if next ;
65 : read-key ( -- string )
67 [ get-char [ CHAR: = = ] [ blank? ] bi or ] take-until ;
71 [ get-char CHAR: = = ] take-until drop next ;
73 : read-value ( -- string )
75 get-char quote? [ read-quote ] [ read-token ] if
79 "-->" take-string make-comment-tag push-tag ;
82 ">" take-string make-dtd-tag push-tag ;
85 next get-char CHAR: - = get-next CHAR: - = and [
92 : read-tag ( -- string )
93 [ get-char CHAR: > = get-char CHAR: < = or ] take-until
94 get-char CHAR: < = [ next ] unless ;
96 : read-< ( -- string )
97 next get-char CHAR: ! = [
103 : read-until-< ( -- string )
104 [ get-char CHAR: < = ] take-until ;
108 make-text-tag push-tag
111 : (parse-attributes) ( -- )
114 read-key >lower read-= read-value
115 2array , (parse-attributes)
118 : parse-attributes ( -- hashtable )
119 [ (parse-attributes) ] { } make >hashtable ;
121 : (parse-tag) ( string -- string' hashtable )
129 (parse-tag) make-tag push-tag
132 : (parse-html) ( -- )
139 : tag-parse ( quot -- vector )
140 V{ } clone tagstack [ string-parse ] with-variable ;
142 : parse-html ( string -- vector )
143 [ (parse-html) tagstack get ] tag-parse ;