Bug 468575 - Scrape some gunk off the config/ grout, r=ted
[wine-gecko.git] / intl / lwbrk / src / jisx4051pairtable.txt
blob39ae7e1096f83e3f3d9b13f93302d72aa96f74e9
4 /* 
6    Simplification of Pair Table in JIS X 4051
8    1. The Origion Table - in 4.1.3
10    In JIS x 4051. The pair table is defined as below
12    Class of
13    Leading    Class of Trailing Char Class
14    Char        
16               1  2  3  4  5  6  7  8  9 10 11 12 13 13 14 14 15 16 17 18 19 20
17                                                  *  #  *  #
18         1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  E
19         2        X  X  X  X  X                                               X
20         3        X  X  X  X  X                                               X
21         4        X  X  X  X  X                                               X
22         5        X  X  X  X  X                                               X
23         6        X  X  X  X  X                                               X
24         7        X  X  X  X  X  X                                            X 
25         8        X  X  X  X  X                                X              E 
26         9        X  X  X  X  X                                               X
27        10        X  X  X  X  X                                               X
28        11        X  X  X  X  X                                               X
29        12        X  X  X  X  X                                               X  
30        13        X  X  X  X  X                    X                          X
31        14        X  X  X  X  X                          X                    X
32        15        X  X  X  X  X        X                       X        X     X 
33        16        X  X  X  X  X                                   X     X     X
34        17        X  X  X  X  X                                               E 
35        18        X  X  X  X  X                                X  X     X     X 
36        19     X  E  E  E  E  E  X  X  X  X  X  X  X  X  X  X  X  X  E  X  E  E
37        20        X  X  X  X  X                                               E
39    * Same Char
40    # Other Char
42    2. Simplified by remove the class which we do not care
44    However, since we do not care about class 13(Subscript), 14(Ruby), 
45    19(split line note begin quote), and 20(split line note end quote) 
46    we can simplify this par table into the following 
48    Class of
49    Leading    Class of Trailing Char Class
50    Char        
52               1  2  3  4  5  6  7  8  9 10 11 12 15 16 17 18 
53                                                  
54         1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X
55         2        X  X  X  X  X                             
56         3        X  X  X  X  X                            
57         4        X  X  X  X  X                           
58         5        X  X  X  X  X                          
59         6        X  X  X  X  X                         
60         7        X  X  X  X  X  X                      
61         8        X  X  X  X  X                    X    
62         9        X  X  X  X  X                                   
63        10        X  X  X  X  X                                  
64        11        X  X  X  X  X                                 
65        12        X  X  X  X  X                                
66        15        X  X  X  X  X        X           X        X    
67        16        X  X  X  X  X                       X     X    
68        17        X  X  X  X  X                                  
69        18        X  X  X  X  X                    X  X     X    
71    3. Simplified by merged classes
73    After the 2 simplification, the pair table have some duplication 
74    a. class 2, 3, 4, 5, 6,  are the same- we can merged them
75    b. class 10, 11, 12, 17  are the same- we can merged them
78    Class of
79    Leading    Class of Trailing Char Class
80    Char        
82               1 [a] 7  8  9 [b]15 16 18 
83                                      
84         1     X  X  X  X  X  X  X  X  X
85       [a]        X                             
86         7        X  X                      
87         8        X              X    
88         9        X                                   
89       [b]        X                                  
90        15        X        X     X     X    
91        16        X                 X  X    
92        18        X              X  X  X    
95    4. Now we use one bit to encode weather it is breakable, and use 2 bytes
96       for one row, then the bit table will look like:
98                  18    <-   1
99             
100        1  0000 0001 1111 1111  = 0x01FF
101       [a] 0000 0000 0000 0010  = 0x0002
102        7  0000 0000 0000 0110  = 0x0006
103        8  0000 0000 0100 0010  = 0x0042
104        9  0000 0000 0000 0010  = 0x0002
105       [b] 0000 0000 0000 0010  = 0x0042
106       15  0000 0001 0101 0010  = 0x0152
107       16  0000 0001 1000 0010  = 0x0182
108       17  0000 0001 1100 0010  = 0x01C2
112 static PRUint16 gJISx4051SimplifiedPair[9] = {
113   0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2
116 PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1)
118   NS_ASSERTION( (aCls1 < 9) "invalid class");
119   NS_ASSERTION( (aCls2 < 9) "invalid class");
120   return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) ));
124 #define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039))
126 nsJISx4051Cls XXXX::GetClass(
127    PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0)
129    // take care the special case in cls 15
130    if( ((0x2C == aChar) || (0x2E == aChar)) &&
131        (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter)))
132    {
133      return kJISx4051Cls_15;
134    }
135    
136    nsJISx4051Cls cls;
137    if(gSingle->Lookup(aChar, &cls))
138      return cls;
140    if(gRange->Lookup(aChar, &cls))
141      return cls;
143    return kJISx4051Cls_15;
147 typedef enum {
148   kJISx4051Cls_1 = 0,
149   kJISx4051Cls_2 = 1,
150   kJISx4051Cls_3 = 1,
151   kJISx4051Cls_4 = 1,
152   kJISx4051Cls_5 = 1,
153   kJISx4051Cls_6 = 1,
154   kJISx4051Cls_7 = 2,
155   kJISx4051Cls_8 = 3,
156   kJISx4051Cls_9 = 4,
157   kJISx4051Cls_10 = 5,
158   kJISx4051Cls_11 = 5,
159   kJISx4051Cls_12 = 5,
160   // kJISx4051Cls_13 = 0,
161   // kJISx4051Cls_14 = 0,
162   kJISx4051Cls_15 = 6,
163   kJISx4051Cls_16 = 7,
164   kJISx4051Cls_17 = 5,
165   kJISx4051Cls_18 = 8,
166   // kJISx4051Cls_19 = 0,
167   // kJISx4051Cls_20 = 0
168 } nsJISx4051Cls;
171   // Table 2
172   YYYY(kJISx4051Cls_1 , 0x0028),
173   YYYY(kJISx4051Cls_1 , 0x005B),
174   YYYY(kJISx4051Cls_1 , 0x007B),
175   YYYY(kJISx4051Cls_1 , 0x2018),
176   YYYY(kJISx4051Cls_1 , 0x201B),
177   YYYY(kJISx4051Cls_1 , 0x201C),
178   YYYY(kJISx4051Cls_1 , 0x201F),
179   YYYY(kJISx4051Cls_1 , 0x3008),
180   YYYY(kJISx4051Cls_1 , 0x300A),
181   YYYY(kJISx4051Cls_1 , 0x300C),
182   YYYY(kJISx4051Cls_1 , 0x300E),
183   YYYY(kJISx4051Cls_1 , 0x3010),
184   YYYY(kJISx4051Cls_1 , 0x3014),
185   YYYY(kJISx4051Cls_1 , 0x3016),
186   YYYY(kJISx4051Cls_1 , 0x3018),
187   YYYY(kJISx4051Cls_1 , 0x301A),
188   YYYY(kJISx4051Cls_1 , 0x301D),
190   // Table 3
191   YYYY(kJISx4051Cls_2 , 0x0029),
192   YYYY(kJISx4051Cls_2 , 0x002C),
193   YYYY(kJISx4051Cls_2 , 0x005D),
194   YYYY(kJISx4051Cls_2 , 0x007D),
195   YYYY(kJISx4051Cls_2 , 0x2019),
196   YYYY(kJISx4051Cls_2 , 0x201A),
197   YYYY(kJISx4051Cls_2 , 0x201D),
198   YYYY(kJISx4051Cls_2 , 0x201E),
199   YYYY(kJISx4051Cls_2 , 0x3001),
200   YYYY(kJISx4051Cls_2 , 0x3009),
201   YYYY(kJISx4051Cls_2 , 0x300B),
202   YYYY(kJISx4051Cls_2 , 0x300D),
203   YYYY(kJISx4051Cls_2 , 0x300F),
204   YYYY(kJISx4051Cls_2 , 0x3011),
205   YYYY(kJISx4051Cls_2 , 0x3015),
206   YYYY(kJISx4051Cls_2 , 0x3017),
207   YYYY(kJISx4051Cls_2 , 0x3019),
208   YYYY(kJISx4051Cls_2 , 0x301B),
209   YYYY(kJISx4051Cls_2 , 0x301E),
210   YYYY(kJISx4051Cls_2 , 0x301F),
212   // Table 4
213   YYYY(kJISx4051Cls_3 , 0x203C),
214   YYYY(kJISx4051Cls_3 , 0x2044),
215   YYYY(kJISx4051Cls_3 , 0x301C),
216   YYYY(kJISx4051Cls_3 , 0x3041),
217   YYYY(kJISx4051Cls_3 , 0x3043),
218   YYYY(kJISx4051Cls_3 , 0x3045),
219   YYYY(kJISx4051Cls_3 , 0x3047),
220   YYYY(kJISx4051Cls_3 , 0x3049),
221   YYYY(kJISx4051Cls_3 , 0x3063),
222   YYYY(kJISx4051Cls_3 , 0x3083),
223   YYYY(kJISx4051Cls_3 , 0x3085),
224   YYYY(kJISx4051Cls_3 , 0x3087),
225   YYYY(kJISx4051Cls_3 , 0x308E),
226   YYYY(kJISx4051Cls_3 , 0x309D),
227   YYYY(kJISx4051Cls_3 , 0x309E),
228   YYYY(kJISx4051Cls_3 , 0x30A1),
229   YYYY(kJISx4051Cls_3 , 0x30A3),
230   YYYY(kJISx4051Cls_3 , 0x30A5),
231   YYYY(kJISx4051Cls_3 , 0x30A7),
232   YYYY(kJISx4051Cls_3 , 0x30A9),
233   YYYY(kJISx4051Cls_3 , 0x30C3),
234   YYYY(kJISx4051Cls_3 , 0x30E3),
235   YYYY(kJISx4051Cls_3 , 0x30E5),
236   YYYY(kJISx4051Cls_3 , 0x30E7),
237   YYYY(kJISx4051Cls_3 , 0x30EE),
238   YYYY(kJISx4051Cls_3 , 0x30F5),
239   YYYY(kJISx4051Cls_3 , 0x30F6),
240   YYYY(kJISx4051Cls_3 , 0x30FC),
241   YYYY(kJISx4051Cls_3 , 0x30FD),
242   YYYY(kJISx4051Cls_3 , 0x30FE),
244   // Table 5
245   YYYY(kJISx4051Cls_4 , 0x0021),
246   YYYY(kJISx4051Cls_4 , 0x003F),
247    
248   // Table 6
249   YYYY(kJISx4051Cls_5 , 0x003A),
250   YYYY(kJISx4051Cls_5 , 0x003B),
251   YYYY(kJISx4051Cls_5 , 0x30FB),
253   // Table 7
254   YYYY(kJISx4051Cls_6 , 0x002E),
255   YYYY(kJISx4051Cls_6 , 0x3002),
257   // Table 8
258   YYYY(kJISx4051Cls_7 , 0x2014),
259   YYYY(kJISx4051Cls_7 , 0x2024),
260   YYYY(kJISx4051Cls_7 , 0x2025),
261   YYYY(kJISx4051Cls_7 , 0x2026),
263   // Table 9
264   YYYY(kJISx4051Cls_8 , 0x0024),
265   YYYY(kJISx4051Cls_8 , 0x00A3),
266   YYYY(kJISx4051Cls_8 , 0x00A5),
267   YYYY(kJISx4051Cls_8 , 0x2116),
269   // Table 10
270   YYYY(kJISx4051Cls_9 , 0x0025),
271   YYYY(kJISx4051Cls_9 , 0x00A2),
272   YYYY(kJISx4051Cls_9 , 0x00B0),
273   YYYY(kJISx4051Cls_9 , 0x2030),
274   YYYY(kJISx4051Cls_9 , 0x2031),
275   YYYY(kJISx4051Cls_9 , 0x2032),
276   YYYY(kJISx4051Cls_9 , 0x2033),
278   // Table 1
279   YYYY(kJISx4051Cls_10, 0x3000),
281   // Table 1
282   ZZZZ(kJISx4051Cls_11, 0x3000),