Fix content language message cache (table of contents test depends on this)
[mediawiki.git] / includes / zhtable / Makefile
blob40db8165af45e98c7ac21bbd19c50295c4f64b95
2 # Creating the file ZhConversion.php used for Simplified/Traditional
3 # Chinese conversion. It gets the basic conversion table from the Unihan
4 # database, and construct the phrase tables using phrase libraries in
5 # the SCIM packages. There are also special tables used to for adjustment.
6 # Some data in the file simp2trad.manual was taken from the following
7 # paper:
8 # Requirement: you need to set your locale to zh_CN.UTF-8 (or any
9 # other utf-8 locales, I suppose)
12 all: ZhConversion.php tradphrases.notsure simpphrases.notsure wordlist
14 Unihan.txt:
15 wget ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip
16 unzip Unihan.zip
18 EZ.txt.in:
19 wget http://freedesktop.org/~suzhe/sources/scim-tables-0.4.3.tar.gz
20 tar zxvf scim-tables-0.4.3.tar.gz > /dev/null
21 cp scim-tables-0.4.3/zh/EZ.txt.in .
22 rm -rf scim-tables-0.4.3*
24 phrase_lib.txt:
25 wget http://freedesktop.org/~suzhe/scim-chinese/scim-chinese-0.4.2.tar.gz
26 tar zxvf scim-chinese-0.4.2.tar.gz > /dev/null
27 cp scim-chinese-0.4.2/data/phrase_lib.txt .
28 rm -rf scim-chinese-0.4.2*
30 tsi.src:
31 wget http://unc.dl.sourceforge.net/sourceforge/libtabe/libtabe-0.2.3.tgz
32 tar zxvf libtabe-0.2.3.tgz > /dev/null
33 cp libtabe/tsi-src/tsi.src .
34 rm -rf libtabe*
36 wordlist: phrase_lib.txt EZ.txt.in tsi.src
37 iconv -c -f big5 -t utf8 tsi.src | sed 's/# //g' | sed 's/[ ][0-9].*//' > wordlist
38 sed 's/\(.*\)\t[0-9][0-9]*.*/\1/' phrase_lib.txt | sed '1,5d' >>wordlist
39 sed '1,/BEGIN_TABLE/d' EZ.txt.in | colrm 1 8 | sed 's/\t.*//' | grep "^...*" >> wordlist
40 sort wordlist | uniq | sed 's/ //g' > t
41 mv t wordlist
43 printutf8: printutf8.c
44 gcc -o printutf8 printutf8.c
46 unihan.t2s.t: Unihan.txt printutf8
47 grep kSimplifiedVariant Unihan.txt | sed '/#/d' | sed 's/kSimplifiedVariant//' | ./printutf8 > unihan.t2s.t
49 trad2simp.t: trad2simp.manual unihan.t2s.t
50 cp unihan.t2s.t tmp1
51 for I in `colrm 11 < trad2simp.manual` ; do sed "/^$$I/d" tmp1 > tmp2; mv tmp2 tmp1; done
52 cat trad2simp.manual tmp1 > trad2simp.t
54 unihan.s2t.t: Unihan.txt printutf8
55 grep kTraditionalVariant Unihan.txt | sed '/#/d' | sed 's/kTraditionalVariant//' | ./printutf8 > unihan.s2t.t
57 simp2trad.t: unihan.s2t.t simp2trad.manual
58 cp unihan.s2t.t tmp1
59 for I in `colrm 11 < simp2trad.manual` ; do sed "/^$$I/d" tmp1 > tmp2; mv tmp2 tmp1; done
60 cat simp2trad.manual tmp1 > simp2trad.t
62 t2s_1tomany.t: trad2simp.t
63 grep -s ".\{19,\}" trad2simp.t | sed 's/U+...../"/' | sed 's/|U+...../"=>"/' | sed 's/|U+.....//g' | sed 's/|/",/' > t2s_1tomany.t
65 t2s_1to1.t: trad2simp.t s2t_1tomany.t
66 sed "/.*|.*|.*|.*/d" trad2simp.t | sed 's/U+[0-9a-z][0-9a-z]*/"/' | sed 's/|U+[0-9a-z][0-9a-z]*/"=>"/' | sed 's/|/",/' > t2s_1to1.t
67 grep '"."=>"..",' s2t_1tomany.t | sed 's/\("."\)=>".\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
68 grep '"."=>"...",' s2t_1tomany.t | sed 's/\("."\)=>".\(.\).",/"\2"=>\1,/' >> t2s_1to1.t
69 grep '"."=>"...",' s2t_1tomany.t | sed 's/\("."\)=>"..\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
70 grep '"."=>"....",' s2t_1tomany.t | sed 's/\("."\)=>".\(.\)..",/"\2"=>\1,/' >> t2s_1to1.t
71 grep '"."=>"....",' s2t_1tomany.t | sed 's/\("."\)=>"..\(.\).",/"\2"=>\1,/' >> t2s_1to1.t
72 grep '"."=>"....",' s2t_1tomany.t | sed 's/\("."\)=>"...\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
73 sort t2s_1to1.t | uniq > t
74 mv t t2s_1to1.t
77 s2t_1tomany.t: simp2trad.t
78 grep -s ".\{19,\}" simp2trad.t | sed 's/U+...../"/' | sed 's/|U+...../"=>"/' | sed 's/|U+.....//g' | sed 's/|/",/' > s2t_1tomany.t
80 s2t_1to1.t: simp2trad.t t2s_1tomany.t
81 sed "/.*|.*|.*|.*/d" simp2trad.t | sed 's/U+[0-9a-z][0-9a-z]*/"/' | sed 's/|U+[0-9a-z][0-9a-z]*/"=>"/' | sed 's/|/",/' > s2t_1to1.t
82 grep '"."=>"..",' t2s_1tomany.t | sed 's/\("."\)=>".\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
83 grep '"."=>"...",' t2s_1tomany.t | sed 's/\("."\)=>".\(.\).",/"\2"=>\1,/' >> s2t_1to1.t
84 grep '"."=>"...",' t2s_1tomany.t | sed 's/\("."\)=>"..\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
85 grep '"."=>"....",' t2s_1tomany.t | sed 's/\("."\)=>".\(.\)..",/"\2"=>\1,/' >> s2t_1to1.t
86 grep '"."=>"....",' t2s_1tomany.t | sed 's/\("."\)=>"..\(.\).",/"\2"=>\1,/' >> s2t_1to1.t
87 grep '"."=>"....",' t2s_1tomany.t | sed 's/\("."\)=>"...\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
88 sort s2t_1to1.t | uniq > t
89 mv t s2t_1to1.t
91 tphrase.t: EZ.txt.in tsi.src
92 colrm 1 8 < EZ.txt.in | sed 's/\t//g' | grep "^.\{2,4\}[0-9]" | sed 's/[0-9]//g' > t
93 iconv -c -f big5 -t utf8 tsi.src | sed 's/ [0-9].*//g' | sed 's/[# ]//g'| grep "^.\{2,4\}" >> t
94 sort t | uniq > tphrase.t
96 alltradphrases.t: tphrase.t s2t_1tomany.t
97 for i in `cat s2t_1tomany.t | sed 's/.*=>".//' | sed 's/"//g' |sed 's/,/\n/' | sed 's/\(.\)/\1\n/g' |sort | uniq`; do grep -s $$i tphrase.t ; done > alltradphrases.t || true
100 tradphrases_2.t: alltradphrases.t
101 cat alltradphrases.t | grep "^..$$" | sort | uniq > tradphrases_2.t
103 tradphrases_3.t: alltradphrases.t
104 cat alltradphrases.t | grep "^...$$" | sort | uniq > tradphrases_3.t
105 for i in `cat tradphrases_2.t`; do grep $$i tradphrases_3.t ; done | sort | uniq > t3 || true
106 diff t3 tradphrases_3.t | grep ">" | sed 's/> //' > t
107 mv t tradphrases_3.t
110 tradphrases_4.t: alltradphrases.t
111 cat alltradphrases.t | grep "^....$$" | sort | uniq > tradphrases_4.t
112 for i in `cat tradphrases_2.t`; do grep $$i tradphrases_4.t ; done | sort | uniq > t3 || true
113 diff t3 tradphrases_4.t | grep ">" | sed 's/> //' > t
114 mv t tradphrases_4.t
115 for i in `cat tradphrases_3.t`; do grep $$i tradphrases_4.t ; done | sort | uniq > t3 || true
116 diff t3 tradphrases_4.t | grep ">" | sed 's/> //' > t
117 mv t tradphrases_4.t
119 tradphrases.t: tradphrases.manual tradphrases_2.t tradphrases_3.t tradphrases_4.t t2s_1tomany.t
120 cat tradphrases.manual tradphrases_2.t tradphrases_3.t tradphrases_4.t |sort | uniq > tradphrases.t
121 for i in `sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep $$i tradphrases.t ; done | diff tradphrases.t - | grep '<' | sed 's/< //' > t
122 mv t tradphrases.t
124 tradphrases.notsure: tradphrases_2.t tradphrases_3.t tradphrases_4.t t2s_1tomany.t
125 cat tradphrases_2.t tradphrases_3.t tradphrases_4.t |sort | uniq > t
126 for i in `sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep $$i t; done | diff t - | grep '>' | sed 's/> //' > tradphrases.notsure
129 ph.t: phrase_lib.txt
130 sed 's/[\t0-9a-zA-Z]//g' phrase_lib.txt | grep "^.\{2,4\}$$" > ph.t
132 allsimpphrases.t: ph.t
133 rm -f allsimpphrases.t
134 for i in `cat t2s_1tomany.t | sed 's/.*=>".//' | sed 's/"//g' | sed 's/,/\n/' | sed 's/\(.\)/\1\n/g' | sort | uniq `; do grep $$i ph.t >> allsimpphrases.t; done
136 simpphrases_2.t: allsimpphrases.t
137 cat allsimpphrases.t | grep "^..$$" | sort | uniq > simpphrases_2.t
139 simpphrases_3.t: allsimpphrases.t
140 cat allsimpphrases.t | grep "^...$$" | sort | uniq > simpphrases_3.t
141 for i in `cat simpphrases_2.t`; do grep $$i simpphrases_3.t ; done | sort | uniq > t3 || true
142 diff t3 simpphrases_3.t | grep ">" | sed 's/> //' > t
143 mv t simpphrases_3.t
145 simpphrases_4.t: allsimpphrases.t
146 cat allsimpphrases.t | grep "^....$$" | sort | uniq > simpphrases_4.t
147 rm -f t
148 for i in `cat simpphrases_2.t`; do grep $$i simpphrases_4.t >> t; done || true
149 sort t | uniq > t3
150 diff t3 simpphrases_4.t | grep ">" | sed 's/> //' > t
151 mv t simpphrases_4.t
152 for i in `cat simpphrases_3.t`; do grep $$i simpphrases_4.t; done | sort | uniq > t3 || true
153 diff t3 simpphrases_4.t | grep ">" | sed 's/> //' > t
154 mv t simpphrases_4.t
156 simpphrases.t:simpphrases_2.t simpphrases_3.t simpphrases_4.t t2s_1tomany.t
157 cat simpphrases_2.t simpphrases_3.t simpphrases_4.t > simpphrases.t
158 for i in `sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep $$i simpphrases.t ; done | diff simpphrases.t - | grep '<' | sed 's/< //' > t
159 mv t simpphrases.t
162 simpphrases.notsure:simpphrases_2.t simpphrases_3.t simpphrases_4.t t2s_1tomany.t
163 cat simpphrases_2.t simpphrases_3.t simpphrases_4.t > t
164 for i in `sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep $$i t ; done | diff t - | grep '>' | sed 's/> //' > simpphrases.notsure
166 trad2simp1to1.t: t2s_1tomany.t t2s_1to1.t
167 sed 's/\(.......\).*/\1",/' t2s_1tomany.t > trad2simp1to1.t
168 cat t2s_1to1.t >> trad2simp1to1.t
170 simp2trad1to1.t: s2t_1tomany.t s2t_1to1.t
171 sed 's/\(.......\).*/\1",/' s2t_1tomany.t > simp2trad1to1.t
172 cat s2t_1to1.t >> simp2trad1to1.t
174 trad2simp.php: trad2simp1to1.t tradphrases.t
175 printf '<?php\n$$trad2simp=array(' > trad2simp.php
176 cat trad2simp1to1.t >> trad2simp.php
177 printf ');\n$$str=\n"' >> trad2simp.php
178 cat tradphrases.t >> trad2simp.php
179 printf '";\n$$t=strtr($$str, $$trad2simp);\necho $$t;\n?>' >> trad2simp.php
181 simp2trad.php: simp2trad1to1.t simpphrases.t
182 printf '<?php\n$$simp2trad=array(' > simp2trad.php
183 cat simp2trad1to1.t >> simp2trad.php
184 printf ');\n$$str=\n"' >> simp2trad.php
185 cat simpphrases.t >> simp2trad.php
186 printf '";\n$$t=strtr($$str, $$simp2trad);\necho $$t;\n?>' >> simp2trad.php
188 simp2trad.phrases.t: trad2simp.php tradphrases.t toTW.manual
189 php -f trad2simp.php | sed 's/\(.*\)/"\1" => /' > tmp1
190 cat tradphrases.t | sed 's/\(.*\)/"\1",/' > tmp2
191 paste tmp1 tmp2 > simp2trad.phrases.t
192 sed 's/\(.*\)\t\(.*\)/"\1"=>"\2",/' toTW.manual >> simp2trad.phrases.t
194 trad2simp.phrases.t: simp2trad.php simpphrases.t toCN.manual
195 php -f simp2trad.php | sed 's/\(.*\)/"\1" => /' > tmp1
196 cat simpphrases.t | sed 's/\(.*\)/"\1",/' > tmp2
197 paste tmp1 tmp2 > trad2simp.phrases.t
198 sed 's/\(.*\)\t\(.*\)/"\1"=>"\2",/' toCN.manual >> trad2simp.phrases.t
200 ZhConversion.php: simp2trad1to1.t simp2trad.phrases.t trad2simp1to1.t trad2simp.phrases.t toHK.manual toSG.manual
201 printf '<?php\n/**\n * Simplified/Traditional Chinese conversion tables\n' > ZhConversion.php
202 printf ' *\n * Automatically generated using code and data in includes/zhtable/\n' >> ZhConversion.php
203 printf ' * Do not modify directly! \n *\n * @package MediaWiki\n*/\n\n' >> ZhConversion.php
204 printf '$$zh2TW=array(\n' >> ZhConversion.php
205 cat simp2trad1to1.t >> ZhConversion.php
206 echo >> ZhConversion.php
207 cat simp2trad.phrases.t >> ZhConversion.php
208 echo >> ZhConversion.php
209 echo ');' >> ZhConversion.php
210 echo >> ZhConversion.php
211 echo >> ZhConversion.php
212 printf '$$zh2CN=array(\n' >> ZhConversion.php
213 cat trad2simp1to1.t >> ZhConversion.php
214 echo >> ZhConversion.php
215 cat trad2simp.phrases.t >> ZhConversion.php
216 echo >> ZhConversion.php
217 printf ');' >> ZhConversion.php
218 echo >> ZhConversion.php
219 echo >> ZhConversion.php
220 printf '$$zh2HK=array(\n' >> ZhConversion.php
221 sed 's/\(.*\)\t\(.*\)/"\1" => "\2",/' toHK.manual >> ZhConversion.php
222 echo >> ZhConversion.php
223 printf ');' >> ZhConversion.php
224 echo >> ZhConversion.php
225 echo >> ZhConversion.php
226 printf '$$zh2SG=array(\n' >> ZhConversion.php
227 sed 's/\(.*\)\t\(.*\)/"\1" => "\2",/' toSG.manual >> ZhConversion.php
228 echo >> ZhConversion.php
229 printf ');' >> ZhConversion.php
230 echo >> ZhConversion.php
231 printf '?>' >> ZhConversion.php
234 clean:
235 rm -f ZhConversion.php tmp1 tmp2 tmp3 t3 *.t trad2simp.php simp2trad.php