1 ! Copyright (C) 2008 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
4 USING: kernel namespaces make xmode.rules xmode.tokens
5 xmode.marker.state xmode.marker.context xmode.utilities
6 xmode.catalog sequences math assocs combinators strings
7 parser-combinators.regexp splitting parser-combinators ascii
8 ascii combinators.short-circuit accessors ;
10 ! Based on org.gjt.sp.jedit.syntax.TokenMarker
12 : current-keyword ( -- string )
13 last-offset get position get line get subseq ;
15 : keyword-number? ( keyword -- ? )
17 [ current-rule-set highlight-digits?>> ]
18 [ dup [ digit? ] any? ]
21 current-rule-set digit-re>>
22 dup [ dupd matches? ] [ drop f ] if
27 : mark-number ( keyword -- id )
28 keyword-number? DIGIT and ;
30 : mark-keyword ( keyword -- id )
31 current-rule-set keywords>> at ;
33 : add-remaining-token ( -- )
34 current-rule-set default>> prev-token, ;
38 dup mark-number [ ] [ mark-keyword ] ?if
39 [ prev-token, ] when* ;
41 : current-char ( -- char )
42 position get line get nth ;
44 GENERIC: match-position ( rule -- n )
46 M: mark-previous-rule match-position drop last-offset get ;
48 M: rule match-position drop position get ;
50 : can-match-here? ( matcher rule -- ? )
53 [ over at-line-start?>> over zero? implies ]
54 [ over at-whitespace-end?>> over whitespace-end get = implies ]
55 [ over at-word-start?>> over last-offset get = implies ]
58 : rest-of-line ( -- str )
59 line get position get tail-slice ;
61 GENERIC: text-matches? ( string text -- match-count/f )
66 M: string-matcher text-matches?
68 [ string>> ] [ ignore-case?>> ] bi string-head?
69 ] keep string>> length and ;
71 M: regexp text-matches?
72 [ >string ] dip match-head ;
74 : rule-start-matches? ( rule -- match-count/f )
75 dup start>> tuck swap can-match-here? [
76 rest-of-line swap text>> text-matches?
81 : rule-end-matches? ( rule -- match-count/f )
82 dup mark-following-rule? [
83 dup start>> swap can-match-here? 0 and
85 dup end>> tuck swap can-match-here? [
87 swap text>> context get end>> or
96 : get-always-rules ( vector/f ruleset -- vector/f )
97 f swap rules>> at ?push-all ;
99 : get-char-rules ( vector/f char ruleset -- vector/f )
100 [ ch>upper ] dip rules>> at ?push-all ;
102 : get-rules ( char ruleset -- seq )
103 [ f ] 2dip [ get-char-rules ] keep get-always-rules ;
105 GENERIC: handle-rule-start ( match-count rule -- )
107 GENERIC: handle-rule-end ( match-count rule -- )
109 : find-escape-rule ( -- rule )
111 in-rule-set>> escape-rule>> [ ] [
112 parent>> in-rule-set>>
113 dup [ escape-rule>> ] when
116 : check-escape-rule ( rule -- ? )
118 find-escape-rule dup [
119 dup rule-start-matches? dup [
120 swap handle-rule-start
121 delegate-end-escaped? [ not ] change
129 : check-every-rule ( -- ? )
130 current-char current-rule-set get-rules
131 [ rule-start-matches? ] map-find
132 dup [ handle-rule-start t ] [ 2drop f ] if ;
136 dup rule-end-matches?
137 dup [ swap handle-rule-end ] [ 2drop ] if
140 : rule-match-token* ( rule -- id )
142 { f [ dup body-token>> ] }
143 { t [ current-rule-set default>> ] }
147 M: escape-rule handle-rule-start
150 process-escape? get [
151 escaped? [ not ] change
152 position [ + ] change
155 M: seq-rule handle-rule-start
159 tuck body-token>> next-token,
160 delegate>> [ push-context ] when* ;
162 UNION: abstract-span-rule span-rule eol-span-rule ;
164 M: abstract-span-rule handle-rule-start
168 tuck rule-match-token* next-token,
170 dup context get (>>in-rule)
171 delegate>> push-context ;
173 M: span-rule handle-rule-end
176 M: mark-following-rule handle-rule-start
178 mark-token add-remaining-token
179 tuck rule-match-token* next-token,
180 f context get (>>end)
181 context get (>>in-rule) ;
183 M: mark-following-rule handle-rule-end
184 nip rule-match-token* prev-token,
185 f context get (>>in-rule) ;
187 M: mark-previous-rule handle-rule-start
190 dup body-token>> prev-token,
191 rule-match-token* next-token, ;
199 : check-end-delegate ( -- ? )
200 context get parent>> [
202 dup rule-end-matches? dup [
208 ] keep context get parent>> in-rule>>
209 rule-match-token* next-token,
211 seen-whitespace-end? on t
212 ] [ drop check-escape-rule ] if
216 : handle-no-word-break ( -- )
217 context get parent>> [
219 dup no-word-break?>> [
220 rule-match-token* prev-token,
230 add-remaining-token ;
232 : (check-word-break) ( -- )
235 1 current-rule-set default>> next-token, ;
237 : rule-set-empty? ( ruleset -- ? )
238 [ rules>> ] [ keywords>> ] bi
239 [ assoc-empty? ] bi@ and ;
241 : check-word-break ( -- ? )
242 current-char dup blank? [
245 seen-whitespace-end? get [
246 position get 1+ whitespace-end set
252 ! Micro-optimization with incorrect semantics; we keep
253 ! it here because jEdit mode files depend on it now...
254 current-rule-set rule-set-empty? [
260 current-rule-set rule-set-no-word-sep* member? [
266 seen-whitespace-end? on
269 delegate-end-escaped? off t ;
272 : mark-token-loop ( -- )
273 position get line get length < [
275 [ check-end-delegate ]
284 : mark-remaining ( -- )
285 line get length position set
288 : unwind-no-line-break ( -- )
289 context get parent>> [
298 : tokenize-line ( line-context line rules -- line-context' seq )