2 # Copyright (C) 2002-2009, International Business Machines Corporation and others.
7 # ICU Character Break Rules, also known as Grapheme Cluster Boundaries
8 # See Unicode Standard Annex #29.
9 # These rules are based on TR29 Revision 13, for Unicode Version 5.1
13 # Character Class Definitions.
15 $CR = [\p{Grapheme_Cluster_Break = CR}];
16 $LF = [\p{Grapheme_Cluster_Break = LF}];
17 $Control = [\p{Grapheme_Cluster_Break = Control}];
18 $Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
19 $Extend = [\p{Grapheme_Cluster_Break = Extend}];
20 $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
21 $BengaliLetter = [\u0985-\u09B9 \u09CE \u09DC-\u09E1 \u09F0-\u09F1];
22 $BengaliSignVirama = \u09CD;
23 $GujaratiLetter = [\u0A85-\u0A8C \u0A8F-\u0A90 \u0A93-\u0AB9 \u0AE0-\u0AE1];
24 $GujaratiSignVirama = \u0ACD;
25 $DevanagariLetter = [\u0904-\u0939 \u0958-\u0961 \u0972-\u097F];
26 $DevanagariSignVirama = \u094D;
27 $KannadaLetter = [\u0C85-\u0CB9 \u0CDE-\u0CE1];
28 $KannadaSignVirama = \u0CCD;
29 $MalayalamLetter = [\u0D05-\u0D39 \u0D60-\u0D61 \u0D7A-\u0D7F];
30 $MalayalamSignVirama = \u0D4D;
31 $OdiaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
32 $OdiaSignVirama = \u0B4D;
33 $GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
34 $GurmukhiSignVirama = \u0A4D;
36 $TamilSignVirama = \u0BCD;
38 $TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
39 $TeluguSignVirama = \u0C4D;
42 # Korean Syllable Definitions
44 $L = [\p{Grapheme_Cluster_Break = L}];
45 $V = [\p{Grapheme_Cluster_Break = V}];
46 $T = [\p{Grapheme_Cluster_Break = T}];
48 $LV = [\p{Grapheme_Cluster_Break = LV}];
49 $LVT = [\p{Grapheme_Cluster_Break = LVT}];
52 ## -------------------------------------------------
59 $BengaliLetter ($BengaliSignVirama $BengaliLetter?)+;
60 $GujaratiLetter ($GujaratiSignVirama $GujaratiLetter?)+;
61 $DevanagariLetter ($DevanagariSignVirama $DevanagariLetter?)+;
62 $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
63 $MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
64 $OdiaLetter ($OdiaSignVirama $OdiaLetter?)+;
65 $GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
66 $TamilKa $TamilSignVirama $TamilSsa;
67 $TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
69 $L ($L | $V | $LV | $LVT);
73 [^$Control $CR $LF] $Extend;
75 [^$Control $CR $LF] $SpacingMark;
76 $Prepend [^$Control $CR $LF];
79 ## -------------------------------------------------
83 ($BengaliLetter? $BengaliSignVirama)+ $BengaliLetter;
84 ($GujaratiLetter? $GujaratiSignVirama)+ $GujaratiLetter;
85 ($DevanagariLetter? $DevanagariSignVirama)+ $DevanagariLetter;
86 ($KannadaLetter? $KannadaSignVirama)+ $KannadaLetter;
87 ($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
88 ($OdiaLetter? $OdiaSignVirama)+ $OdiaLetter;
89 ($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
90 $TamilSsa $TamilSignVirama $TamilKa;
91 ($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
92 ($L | $V | $LV | $LVT) $L;
96 $Extend [^$Control $CR $LF];
97 $SpacingMark [^$Control $CR $LF];
98 [^$Control $CR $LF] $Prepend;
101 ## -------------------------------------------------
106 ## -------------------------------------------------