update emoji autocorrect entries from po-files
[LibreOffice.git] / i18npool / source / breakiterator / data / char_in.txt
blob414259524302ee224a113b0913e5828d36a7c724
2 #   Copyright (C) 2002-2009, International Business Machines Corporation and others.
3 #       All Rights Reserved.
5 #   file:  char.txt 
7 #   ICU Character Break Rules, also known as Grapheme Cluster Boundaries
8 #      See Unicode Standard Annex #29.
9 #      These rules are based on TR29 Revision 13, for Unicode Version 5.1
13 #  Character Class Definitions.
15 $CR          = [\p{Grapheme_Cluster_Break = CR}];
16 $LF          = [\p{Grapheme_Cluster_Break = LF}];
17 $Control     = [\p{Grapheme_Cluster_Break = Control}];
18 $Prepend     = [\p{Grapheme_Cluster_Break = Prepend}];
19 $Extend      = [\p{Grapheme_Cluster_Break = Extend}];
20 $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
21 $BengaliLetter = [\u0985-\u09B9 \u09CE \u09DC-\u09E1 \u09F0-\u09F1];
22 $BengaliSignVirama = \u09CD;
23 $GujaratiLetter = [\u0A85-\u0A8C \u0A8F-\u0A90 \u0A93-\u0AB9 \u0AE0-\u0AE1];
24 $GujaratiSignVirama = \u0ACD;
25 $DevanagariLetter = [\u0904-\u0939 \u0958-\u0961 \u0972-\u097F];
26 $DevanagariSignVirama = \u094D;
27 $KannadaLetter = [\u0C85-\u0CB9 \u0CDE-\u0CE1];
28 $KannadaSignVirama = \u0CCD;
29 $MalayalamLetter = [\u0D05-\u0D39 \u0D60-\u0D61 \u0D7A-\u0D7F];
30 $MalayalamSignVirama = \u0D4D;
31 $OdiaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
32 $OdiaSignVirama = \u0B4D;
33 $GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
34 $GurmukhiSignVirama = \u0A4D;
35 $TamilKa = \u0B95;
36 $TamilSignVirama = \u0BCD;
37 $TamilSsa = \u0BB7;
38 $TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
39 $TeluguSignVirama = \u0C4D;
42 # Korean Syllable Definitions
44 $L       = [\p{Grapheme_Cluster_Break = L}];
45 $V       = [\p{Grapheme_Cluster_Break = V}];
46 $T       = [\p{Grapheme_Cluster_Break = T}];
48 $LV      = [\p{Grapheme_Cluster_Break = LV}];
49 $LVT     = [\p{Grapheme_Cluster_Break = LVT}];
52 ## -------------------------------------------------
53 !!chain;
55 !!forward;
57 $CR $LF;
59 $BengaliLetter ($BengaliSignVirama $BengaliLetter?)+;
60 $GujaratiLetter ($GujaratiSignVirama $GujaratiLetter?)+;
61 $DevanagariLetter ($DevanagariSignVirama $DevanagariLetter?)+;
62 $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
63 $MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
64 $OdiaLetter ($OdiaSignVirama $OdiaLetter?)+;
65 $GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
66 $TamilKa $TamilSignVirama $TamilSsa;
67 $TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
69 $L ($L | $V | $LV | $LVT);
70 ($LV | $V) ($V | $T);
71 ($LVT | $T) $T;
73 [^$Control $CR $LF] $Extend;
75 [^$Control $CR $LF] $SpacingMark;
76 $Prepend [^$Control $CR $LF];
79 ## -------------------------------------------------
81 !!reverse;
82 $LF $CR;
83 ($BengaliLetter? $BengaliSignVirama)+ $BengaliLetter;
84 ($GujaratiLetter? $GujaratiSignVirama)+ $GujaratiLetter;
85 ($DevanagariLetter? $DevanagariSignVirama)+ $DevanagariLetter;
86 ($KannadaLetter? $KannadaSignVirama)+ $KannadaLetter;
87 ($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
88 ($OdiaLetter? $OdiaSignVirama)+ $OdiaLetter;
89 ($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
90 $TamilSsa $TamilSignVirama $TamilKa;
91 ($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
92 ($L | $V | $LV | $LVT) $L;
93 ($V | $T) ($LV | $V);
94 $T ($LVT | $T);
96 $Extend      [^$Control $CR $LF];
97 $SpacingMark [^$Control $CR $LF];
98 [^$Control $CR $LF] $Prepend;
101 ## -------------------------------------------------
103 !!safe_reverse;
106 ## -------------------------------------------------
108 !!safe_forward;