1 ! Copyright (C) 2008 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: kernel sequences accessors namespaces math words strings
4 io vectors arrays math.parser combinators continuations ;
7 TUPLE: lexer text line line-text line-length column ;
9 : next-line ( lexer -- )
10 dup [ line>> ] [ text>> ] bi ?nth >>line-text
11 dup line-text>> length >>line-length
16 : new-lexer ( text class -- lexer )
20 dup next-line ; inline
22 : <lexer> ( text -- lexer )
25 : skip ( i seq ? -- n )
27 [ swap CHAR: \s eq? xor ] curry find-from drop
30 : change-lexer-column ( lexer quot -- )
32 [ [ column>> ] [ line-text>> ] bi rot call ] keep
35 GENERIC: skip-blank ( lexer -- )
37 M: lexer skip-blank ( lexer -- )
38 [ t skip ] change-lexer-column ;
40 GENERIC: skip-word ( lexer -- )
42 M: lexer skip-word ( lexer -- )
44 2dup nth CHAR: " eq? [ drop 1+ ] [ f skip ] if
45 ] change-lexer-column ;
47 : still-parsing? ( lexer -- ? )
48 [ line>> ] [ text>> ] bi length <= ;
50 : still-parsing-line? ( lexer -- ? )
51 [ column>> ] [ line-length>> ] bi < ;
53 : (parse-token) ( lexer -- str )
61 : parse-token ( lexer -- str/f )
64 dup still-parsing-line?
65 [ (parse-token) ] [ dup next-line parse-token ] if
68 : scan ( -- str/f ) lexer get parse-token ;
70 ERROR: unexpected want got ;
72 PREDICATE: unexpected-eof < unexpected
75 : unexpected-eof ( word -- * ) f unexpected ;
79 [ 2dup = [ 2drop ] [ unexpected ] if ]
83 : (parse-tokens) ( accum end -- accum )
87 [ pick push (parse-tokens) ] [ unexpected-eof ] if*
90 : parse-tokens ( end -- seq )
91 100 <vector> swap (parse-tokens) >array ;
93 TUPLE: lexer-error line column line-text error ;
95 : <lexer-error> ( msg -- error )
100 [ line-text>> >>line-text ]
104 : lexer-dump ( error -- )
105 [ line>> number>string ": " append ]
106 [ line-text>> dup string? [ drop "" ] unless ]
107 [ column>> 0 or ] tri
108 pick length + CHAR: \s <string>
109 [ write ] [ print ] [ write "^" print ] tri* ;
111 : with-lexer ( lexer quot -- newquot )
112 [ lexer set ] dip [ <lexer-error> rethrow ] recover ; inline
114 SYMBOL: lexer-factory
116 [ <lexer> ] lexer-factory set-global