2 # Creating the file ZhConversion.php used for Simplified/Traditional
3 # Chinese conversion. It gets the basic conversion table from the Unihan
4 # database, and construct the phrase tables using phrase libraries in
5 # the SCIM packages. There are also special tables used to for adjustment.
6 # Some data in the file simp2trad.manual was taken from the following
10 GREP
= LANG
=zh_CN.UTF8 grep
11 SED
= LANG
=zh_CN.UTF8 sed
12 DIFF
= LANG
=zh_CN.UTF8 diff
14 #installation directory
15 INSTDIR
= /usr
/local
/share
/zhdaemons
/
17 all: ZhConversion.php tradphrases.notsure simpphrases.notsure wordlist toCN.dict toTW.dict toHK.dict toSG.dict
20 wget ftp
://ftp.unicode.org
/Public
/UNIDATA
/Unihan.zip
24 wget http
://freedesktop.org
/~suzhe
/sources
/scim-tables-0.4
.3.
tar.gz
25 tar zxvf scim-tables-0.4
.3.
tar.gz
> /dev
/null
26 cp scim-tables-0.4
.3/zh
/EZ.txt.in .
27 rm -rf scim-tables-0.4
.3*
30 wget http
://freedesktop.org
/~suzhe
/scim-chinese
/scim-chinese-0.4
.2.
tar.gz
31 tar zxvf scim-chinese-0.4
.2.
tar.gz
> /dev
/null
32 cp scim-chinese-0.4
.2/data
/phrase_lib.txt .
33 rm -rf scim-chinese-0.4
.2*
36 wget http
://unc.dl.sourceforge.net
/sourceforge
/libtabe
/libtabe-0.2
.3.tgz
37 tar zxvf libtabe-0.2
.3.tgz
> /dev
/null
38 cp libtabe
/tsi-src
/tsi.src .
41 wordlist
: phrase_lib.txt EZ.txt.in tsi.src
42 iconv
-c
-f big5
-t utf8 tsi.src |
$(SED
) 's/# //g' |
$(SED
) 's/[ ][0-9].*//' > wordlist
43 $(SED
) 's/\(.*\)\t[0-9][0-9]*.*/\1/' phrase_lib.txt |
$(SED
) '1,5d' >>wordlist
44 $(SED
) '1,/BEGIN_TABLE/d' EZ.txt.in | colrm
1 8 |
$(SED
) 's/\t.*//' |
$(GREP
) "^...*" >> wordlist
45 sort wordlist | uniq |
$(SED
) 's/ //g' > t
48 printutf8
: printutf8.c
49 gcc
-o printutf8 printutf8.c
51 unihan.t2s.t
: Unihan.txt printutf8
52 $(GREP
) kSimplifiedVariant Unihan.txt |
$(SED
) '/#/d' |
$(SED
) 's/kSimplifiedVariant//' | .
/printutf8
> unihan.t2s.t
54 trad2simp.t
: trad2simp.manual unihan.t2s.t
56 for I in
`colrm 11 < trad2simp.manual` ; do
$(SED
) "/^$$I/d" tmp1
> tmp2
; mv tmp2 tmp1
; done
57 cat trad2simp.manual tmp1
> trad2simp.t
59 unihan.s2t.t
: Unihan.txt printutf8
60 $(GREP
) kTraditionalVariant Unihan.txt |
$(SED
) '/#/d' |
$(SED
) 's/kTraditionalVariant//' | .
/printutf8
> unihan.s2t.t
62 simp2trad.t
: unihan.s2t.t simp2trad.manual
64 for I in
`colrm 11 < simp2trad.manual` ; do
$(SED
) "/^$$I/d" tmp1
> tmp2
; mv tmp2 tmp1
; done
65 cat simp2trad.manual tmp1
> simp2trad.t
67 t2s_1tomany.t
: trad2simp.t
68 $(GREP
) -s
".\{19,\}" trad2simp.t |
$(SED
) 's/U+...../"/' |
$(SED
) 's/|U+...../"=>"/' |
$(SED
) 's/|U+.....//g' |
$(SED
) 's/|/",/' > t2s_1tomany.t
70 t2s_1to1.t
: trad2simp.t s2t_1tomany.t
71 $(SED
) "/.*|.*|.*|.*/d" trad2simp.t |
$(SED
) 's/U+[0-9a-z][0-9a-z]*/"/' |
$(SED
) 's/|U+[0-9a-z][0-9a-z]*/"=>"/' |
$(SED
) 's/|/",/' > t2s_1to1.t
72 $(GREP
) '"."=>"..",' s2t_1tomany.t |
$(SED
) 's/\("."\)=>".\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
73 $(GREP
) '"."=>"...",' s2t_1tomany.t |
$(SED
) 's/\("."\)=>".\(.\).",/"\2"=>\1,/' >> t2s_1to1.t
74 $(GREP
) '"."=>"...",' s2t_1tomany.t |
$(SED
) 's/\("."\)=>"..\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
75 $(GREP
) '"."=>"....",' s2t_1tomany.t |
$(SED
) 's/\("."\)=>".\(.\)..",/"\2"=>\1,/' >> t2s_1to1.t
76 $(GREP
) '"."=>"....",' s2t_1tomany.t |
$(SED
) 's/\("."\)=>"..\(.\).",/"\2"=>\1,/' >> t2s_1to1.t
77 $(GREP
) '"."=>"....",' s2t_1tomany.t |
$(SED
) 's/\("."\)=>"...\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
78 sort t2s_1to1.t | uniq
> t
82 s2t_1tomany.t
: simp2trad.t
83 $(GREP
) -s
".\{19,\}" simp2trad.t |
$(SED
) 's/U+...../"/' |
$(SED
) 's/|U+...../"=>"/' |
$(SED
) 's/|U+.....//g' |
$(SED
) 's/|/",/' > s2t_1tomany.t
85 s2t_1to1.t
: simp2trad.t t2s_1tomany.t
86 $(SED
) "/.*|.*|.*|.*/d" simp2trad.t |
$(SED
) 's/U+[0-9a-z][0-9a-z]*/"/' |
$(SED
) 's/|U+[0-9a-z][0-9a-z]*/"=>"/' |
$(SED
) 's/|/",/' > s2t_1to1.t
87 $(GREP
) '"."=>"..",' t2s_1tomany.t |
$(SED
) 's/\("."\)=>".\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
88 $(GREP
) '"."=>"...",' t2s_1tomany.t |
$(SED
) 's/\("."\)=>".\(.\).",/"\2"=>\1,/' >> s2t_1to1.t
89 $(GREP
) '"."=>"...",' t2s_1tomany.t |
$(SED
) 's/\("."\)=>"..\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
90 $(GREP
) '"."=>"....",' t2s_1tomany.t |
$(SED
) 's/\("."\)=>".\(.\)..",/"\2"=>\1,/' >> s2t_1to1.t
91 $(GREP
) '"."=>"....",' t2s_1tomany.t |
$(SED
) 's/\("."\)=>"..\(.\).",/"\2"=>\1,/' >> s2t_1to1.t
92 $(GREP
) '"."=>"....",' t2s_1tomany.t |
$(SED
) 's/\("."\)=>"...\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
93 sort s2t_1to1.t | uniq
> t
96 tphrase.t
: EZ.txt.in tsi.src
97 colrm
1 8 < EZ.txt.in |
$(SED
) 's/\t//g' |
$(GREP
) "^.\{2,4\}[0-9]" |
$(SED
) 's/[0-9]//g' > t
98 iconv
-c
-f big5
-t utf8 tsi.src |
$(SED
) 's/ [0-9].*//g' |
$(SED
) 's/[# ]//g'|
$(GREP
) "^.\{2,4\}" >> t
99 sort t | uniq
> tphrase.t
101 alltradphrases.t
: tphrase.t s2t_1tomany.t
102 for i in
`cat s2t_1tomany.t | $(SED) 's/.*=>".//' | $(SED) 's/"//g' |$(SED) 's/,/\n/' | $(SED) 's/\(.\)/\1\n/g' |sort | uniq`; do
$(GREP
) -s
$$i tphrase.t
; done
> alltradphrases.t || true
105 tradphrases_2.t
: alltradphrases.t
106 cat alltradphrases.t |
$(GREP
) "^..$$" |
sort | uniq
> tradphrases_2.t
108 tradphrases_3.t
: alltradphrases.t
109 cat alltradphrases.t |
$(GREP
) "^...$$" |
sort | uniq
> tradphrases_3.t
110 for i in
`cat tradphrases_2.t`; do
$(GREP
) $$i tradphrases_3.t
; done |
sort | uniq
> t3 || true
111 $(DIFF
) t3 tradphrases_3.t |
$(GREP
) ">" |
$(SED
) 's/> //' > t
115 tradphrases_4.t
: alltradphrases.t
116 cat alltradphrases.t |
$(GREP
) "^....$$" |
sort | uniq
> tradphrases_4.t
117 for i in
`cat tradphrases_2.t`; do
$(GREP
) $$i tradphrases_4.t
; done |
sort | uniq
> t3 || true
118 $(DIFF
) t3 tradphrases_4.t |
$(GREP
) ">" |
$(SED
) 's/> //' > t
120 for i in
`cat tradphrases_3.t`; do
$(GREP
) $$i tradphrases_4.t
; done |
sort | uniq
> t3 || true
121 $(DIFF
) t3 tradphrases_4.t |
$(GREP
) ">" |
$(SED
) 's/> //' > t
124 tradphrases.t
: tradphrases.manual tradphrases_2.t tradphrases_3.t tradphrases_4.t t2s_1tomany.t
125 cat tradphrases.manual tradphrases_2.t tradphrases_3.t tradphrases_4.t |
sort | uniq
> tradphrases.t
126 for i in
`$(SED) 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do
$(GREP
) $$i tradphrases.t
; done |
$(DIFF
) tradphrases.t
- |
$(GREP
) '<' |
$(SED
) 's/< //' > t
129 tradphrases.notsure
: tradphrases_2.t tradphrases_3.t tradphrases_4.t t2s_1tomany.t
130 cat tradphrases_2.t tradphrases_3.t tradphrases_4.t |
sort | uniq
> t
131 for i in
`$(SED) 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do
$(GREP
) $$i t
; done |
$(DIFF
) t
- |
$(GREP
) '>' |
$(SED
) 's/> //' > tradphrases.notsure
135 $(SED
) 's/[\t0-9a-zA-Z]//g' phrase_lib.txt |
$(GREP
) "^.\{2,4\}$$" > ph.t
137 allsimpphrases.t
: ph.t
138 rm -f allsimpphrases.t
139 for i in
`cat t2s_1tomany.t | $(SED) 's/.*=>".//' | $(SED) 's/"//g' | $(SED) 's/,/\n/' | $(SED) 's/\(.\)/\1\n/g' | sort | uniq `; do
$(GREP
) $$i ph.t
>> allsimpphrases.t
; done
141 simpphrases_2.t
: allsimpphrases.t
142 cat allsimpphrases.t |
$(GREP
) "^..$$" |
sort | uniq
> simpphrases_2.t
144 simpphrases_3.t
: allsimpphrases.t
145 cat allsimpphrases.t |
$(GREP
) "^...$$" |
sort | uniq
> simpphrases_3.t
146 for i in
`cat simpphrases_2.t`; do
$(GREP
) $$i simpphrases_3.t
; done |
sort | uniq
> t3 || true
147 $(DIFF
) t3 simpphrases_3.t |
$(GREP
) ">" |
$(SED
) 's/> //' > t
150 simpphrases_4.t
: allsimpphrases.t
151 cat allsimpphrases.t |
$(GREP
) "^....$$" |
sort | uniq
> simpphrases_4.t
153 for i in
`cat simpphrases_2.t`; do
$(GREP
) $$i simpphrases_4.t
>> t
; done || true
155 $(DIFF
) t3 simpphrases_4.t |
$(GREP
) ">" |
$(SED
) 's/> //' > t
157 for i in
`cat simpphrases_3.t`; do
$(GREP
) $$i simpphrases_4.t
; done |
sort | uniq
> t3 || true
158 $(DIFF
) t3 simpphrases_4.t |
$(GREP
) ">" |
$(SED
) 's/> //' > t
161 simpphrases.t
:simpphrases_2.t simpphrases_3.t simpphrases_4.t t2s_1tomany.t
162 cat simpphrases_2.t simpphrases_3.t simpphrases_4.t
> simpphrases.t
163 for i in
`$(SED) 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do
$(GREP
) $$i simpphrases.t
; done |
$(DIFF
) simpphrases.t
- |
$(GREP
) '<' |
$(SED
) 's/< //' > t
167 simpphrases.notsure
:simpphrases_2.t simpphrases_3.t simpphrases_4.t t2s_1tomany.t
168 cat simpphrases_2.t simpphrases_3.t simpphrases_4.t
> t
169 for i in
`$(SED) 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do
$(GREP
) $$i t
; done |
$(DIFF
) t
- |
$(GREP
) '>' |
$(SED
) 's/> //' > simpphrases.notsure
171 trad2simp1to1.t
: t2s_1tomany.t t2s_1to1.t
172 $(SED
) 's/\(.......\).*/\1",/' t2s_1tomany.t
> trad2simp1to1.t
173 cat t2s_1to1.t
>> trad2simp1to1.t
175 simp2trad1to1.t
: s2t_1tomany.t s2t_1to1.t
176 $(SED
) 's/\(.......\).*/\1",/' s2t_1tomany.t
> simp2trad1to1.t
177 cat s2t_1to1.t
>> simp2trad1to1.t
179 trad2simp.php
: trad2simp1to1.t tradphrases.t
180 printf
'<?php\n$$trad2simp=array(' > trad2simp.php
181 cat trad2simp1to1.t
>> trad2simp.php
182 printf
');\n$$str=\n"' >> trad2simp.php
183 cat tradphrases.t
>> trad2simp.php
184 printf
'";\n$$t=strtr($$str, $$trad2simp);\necho $$t;\n?>' >> trad2simp.php
186 simp2trad.php
: simp2trad1to1.t simpphrases.t
187 printf
'<?php\n$$simp2trad=array(' > simp2trad.php
188 cat simp2trad1to1.t
>> simp2trad.php
189 printf
');\n$$str=\n"' >> simp2trad.php
190 cat simpphrases.t
>> simp2trad.php
191 printf
'";\n$$t=strtr($$str, $$simp2trad);\necho $$t;\n?>' >> simp2trad.php
193 simp2trad.phrases.t
: trad2simp.php tradphrases.t toTW.manual
194 php
-f trad2simp.php |
$(SED
) 's/\(.*\)/"\1" => /' > tmp1
195 cat tradphrases.t |
$(SED
) 's/\(.*\)/"\1",/' > tmp2
196 paste tmp1 tmp2
> simp2trad.phrases.t
197 $(SED
) 's/\(.*\)\t\(.*\)/"\1"=>"\2",/' toTW.manual
>> simp2trad.phrases.t
199 trad2simp.phrases.t
: simp2trad.php simpphrases.t toCN.manual
200 php
-f simp2trad.php |
$(SED
) 's/\(.*\)/"\1" => /' > tmp1
201 cat simpphrases.t |
$(SED
) 's/\(.*\)/"\1",/' > tmp2
202 paste tmp1 tmp2
> trad2simp.phrases.t
203 $(SED
) 's/\(.*\)\t\(.*\)/"\1"=>"\2",/' toCN.manual
>> trad2simp.phrases.t
205 toCN.dict
: trad2simp1to1.t trad2simp.phrases.t
206 cat trad2simp1to1.t |
$(SED
) 's/[, \t]//g' |
$(SED
) 's/=>/\t/' > toCN.dict
207 cat trad2simp.phrases.t |
$(SED
) 's/[, \t]//g' |
$(SED
) 's/=>/\t/' >> toCN.dict
209 toTW.dict
: simp2trad1to1.t simp2trad.phrases.t
210 cat simp2trad1to1.t |
$(SED
) 's/[, \t]//g' |
$(SED
) 's/=>/\t/' > toTW.dict
211 cat simp2trad.phrases.t |
$(SED
) 's/[, \t]//g' |
$(SED
) 's/=>/\t/' >> toTW.dict
213 toHK.dict
: toHK.manual
214 cat toHK.manual |
$(SED
) 's/[ ]//g' |
$(SED
) 's/\(^[^ \t]*\)[ \t][ \t]*\([^ \t]*\)/"\1"\t"\2"/' > toHK.dict
216 toSG.dict
: toSG.manual
217 cat toSG.manual |
$(SED
) 's/[ ]//g' |
$(SED
) 's/\(^[^ \t]*\)[ \t][ \t]*\([^ \t]*\)/"\1"\t"\2"/' > toSG.dict
221 ZhConversion.php
: simp2trad1to1.t simp2trad.phrases.t trad2simp1to1.t trad2simp.phrases.t toHK.manual toSG.manual
222 printf
'<?php\n/**\n * Simplified/Traditional Chinese conversion tables\n' > ZhConversion.php
223 printf
' *\n * Automatically generated using code and data in includes/zhtable/\n' >> ZhConversion.php
224 printf
' * Do not modify directly! \n *\n * @package MediaWiki\n*/\n\n' >> ZhConversion.php
225 printf
'$$zh2TW=array(\n' >> ZhConversion.php
226 cat simp2trad1to1.t
>> ZhConversion.php
227 echo
>> ZhConversion.php
228 cat simp2trad.phrases.t
>> ZhConversion.php
229 echo
>> ZhConversion.php
230 echo
');' >> ZhConversion.php
231 echo
>> ZhConversion.php
232 echo
>> ZhConversion.php
233 printf
'$$zh2CN=array(\n' >> ZhConversion.php
234 cat trad2simp1to1.t
>> ZhConversion.php
235 echo
>> ZhConversion.php
236 cat trad2simp.phrases.t
>> ZhConversion.php
237 echo
>> ZhConversion.php
238 printf
');' >> ZhConversion.php
239 echo
>> ZhConversion.php
240 echo
>> ZhConversion.php
241 printf
'$$zh2HK=array(\n' >> ZhConversion.php
242 $(SED
) 's/\(.*\)\t\(.*\)/"\1" => "\2",/' toHK.manual
>> ZhConversion.php
243 echo
>> ZhConversion.php
244 printf
');' >> ZhConversion.php
245 echo
>> ZhConversion.php
246 echo
>> ZhConversion.php
247 printf
'$$zh2SG=array(\n' >> ZhConversion.php
248 $(SED
) 's/\(.*\)\t\(.*\)/"\1" => "\2",/' toSG.manual
>> ZhConversion.php
249 echo
>> ZhConversion.php
250 printf
');' >> ZhConversion.php
251 echo
>> ZhConversion.php
252 printf
'?>' >> ZhConversion.php
256 rm -f ZhConversion.php tmp1 tmp2 tmp3 t3
*.t trad2simp.php simp2trad.php
*.dict printutf8
*~