1 USING: help.syntax help.markup strings ;
6 ARTICLE: "unicode.data" "Unicode data tables"
7 "The " { $vocab-link "unicode.data" "unicode.data" } " vocabulary contains core Unicode data tables and code for parsing this from files."
8 { $subsection load-script }
9 { $subsection canonical-entry }
10 { $subsection combine-chars }
11 { $subsection combining-class }
12 { $subsection non-starter? }
13 { $subsection name>char }
14 { $subsection char>name }
15 { $subsection property? } ;
18 { $values { "filename" string } { "table" "an interval map" } }
19 { $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ;
22 { $values { "char" "a code point" } { "seq" string } }
23 { $description "Finds the canonical decomposition (NFD) for a code point" } ;
26 { $values { "a" "a code point" } { "b" "a code point" } { "char/f" "a code point" } }
27 { $description "If a followed by b can be combined in NFC, this returns the code point of their combination." } ;
29 HELP: compatibility-entry
30 { $values { "char" "a code point" } { "seq" string } }
31 { $description "This returns the compatibility decomposition (NFKD) for a code point" } ;
34 { $values { "char" "a code point" } { "n" "an integer" } }
35 { $description "Finds the combining class of a code point." } ;
38 { $values { "char" "a code point" } { "?" "a boolean" } }
39 { $description "Returns true if the code point has a combining class." } ;
42 { $values { "char" "a code point" } { "name" string } }
43 { $description "Looks up the name of a given code point. Warning: this is not optimized for speed, to save space." } ;
46 { $values { "name" string } { "char" "a code point" } }
47 { $description "Looks up the code point corresponding to a given name." } ;
50 { $values { "char" "a code point" } { "property" string } { "?" "a boolean" } }
51 { $description "Tests whether the code point is listed under the given property in PropList.txt in the Unicode Character Database." } ;