1 ! Copyright (C) 2008 Daniel Ehrenberg.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: unicode.data kernel math sequences parser lexer
4 bit-arrays namespaces make sequences.private arrays quotations
5 assocs classes.predicate math.order strings.parser ;
8 ! Character classes (categories)
10 : category# ( char -- category )
11 ! There are a few characters that should be Cn
12 ! that this gives Cf or Mn
13 ! Cf = 26; Mn = 5; Cn = 29
14 ! Use a compressed array instead?
15 dup category-map ?nth [ ] [
16 dup HEX: E0001 HEX: E007F between?
18 HEX: E0100 HEX: E01EF between? 5 29 ?
22 : category ( char -- category )
23 category# categories nth ;
25 : >category-array ( categories -- bitarray )
26 categories [ swap member? ] with map >bit-array ;
28 : as-string ( strings -- bit-array )
29 concat unescape-string ;
31 : [category] ( categories -- quot )
33 [ [ categories member? not ] filter as-string ] keep
34 [ categories member? ] filter >category-array
35 [ dup category# ] % , [ nth-unsafe [ drop t ] ] %
36 \ member? 2array >quotation ,
40 : define-category ( word categories -- )
41 [category] integer swap define-predicate-class ;
44 CREATE ";" parse-tokens define-category ; parsing
46 : seq-minus ( seq1 seq2 -- diff )
47 [ member? not ] curry filter ;
50 CREATE ";" parse-tokens
51 categories swap seq-minus define-category ; parsing