2 # Create_JS_wordlists.praat
4 # Praat script converting wordlsit files into Javascript code
6 # Copyright (C) 2016 R.J.J.H. van Son and the Netherlands Cancer Institute
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 form Source directory and target file
24 sentence Source_directory ../wordlists
25 sentence Target_file wordlists_plus.js
28 Text writing preferences... UTF-8
30 call tables2javascript "'source_directory$'" 'target_file$'
33 procedure tables2javascript .sourceDir$ .targetFile$
34 # Iterate over all tables in the directory
35 if fileReadable(.sourceDir$)
36 deleteFile(.targetFile$)
38 fileappend '.targetFile$' /* 'target_file$''newline$'
39 fileappend '.targetFile$' * 'newline$'
40 fileappend '.targetFile$' * Tables and this code are licensed under the GNU GPL version 2'newline$'
41 fileappend '.targetFile$' * or later.'newline$'
42 fileappend '.targetFile$' * 'newline$'
43 fileappend '.targetFile$' */ 'newline$'
44 fileappend '.targetFile$' 'newline$'
46 fileappend '.targetFile$' var wordlists = ['newline$'
48 .nameStart = rindex(.targetFile$, "/")+1
49 .nameEnd = rindex(.targetFile$, ".") -.nameStart
50 .newTableNameList$ = mid$(.targetFile$, .nameStart, .nameEnd)
51 Create Table with column names... '.newTableNameList$' 0 Name
53 # Create a list of Tables with Paths
54 Create Table with column names... ListOfTables 0 Name Directory
56 call createListOfTables ListOfTables '.sourceDir$'
58 select Table ListOfTables
59 .numOfTables = Get number of rows
60 for .i to .numOfTables
61 select Table ListOfTables
62 .table$ = Get value... '.i' Name
63 .tableDir$ = Get value... '.i' Directory
65 Read from file... '.tableDir$'/'.table$'
66 .tableName$ = selected$("Table")
67 if .tableName$ = "wordlist" or .tableName$ = "table"
68 .nameStart = rindex(.tableDir$, "/")+1
69 .nameEnd = length(.tableDir$)+1 -.nameStart
70 .newTableName$ = mid$(.tableDir$, .nameStart, .nameEnd)
71 select Table '.tableName$'
72 Rename... '.newTableName$'
73 .tableName$ = selected$("Table")
76 select Table '.newTableNameList$'
78 .currentTableNum = Get number of rows
79 Set string value... '.currentTableNum' Name '.tableName$'
82 call table2objectlist '.tableName$' '.targetFile$'
83 fileappend '.targetFile$' 'tab$''tab$']
85 fileappend '.targetFile$' ,
87 fileappend '.targetFile$' 'newline$'
90 select Table '.tableName$'
95 fileappend '.targetFile$' 'tab$']'newline$''newline$'
97 select Table '.newTableNameList$'
98 plus Table ListOfTables
101 exit Directory not found: '.sourceDir$'
105 # Convert a single table to a Praat script
106 procedure table2objectlist .tableName$ .targetFile$
107 select Table '.tableName$'
110 # Collect information
111 .numberOfColumns = Get number of columns
112 .numberOfRows = Get number of rows
114 # Set name of procedure as variable
115 .tableVariableName$ = replace_regex$(.tableName$, "_", " ", 0);
118 fileappend '.targetFile$' 'tab$''tab$'['newline$'
119 fileappend '.targetFile$' 'tab$''tab$'"'.tableVariableName$'", ['newline$'
120 # Create table with columns
123 .labelList$[1] = "Pinyin"
124 .labelList$[2] = "Character"
125 .labelList$[3] = "Translation"
126 .labelList$[4] = "Lesson"
127 .labelList$[5] = "Sound"
129 for .row to .numberOfRows
130 fileappend '.targetFile$' 'tab$''tab$''tab$'[
133 .label$ = .labelList$[.col]
134 .colIDX = Get column index: .label$
136 .value$ = Get value... '.row' '.label$'
143 fileappend '.targetFile$' 'separator$'"'.value$'"
145 if .label$ = "Pinyin"
146 call numbers2pinyin '.value$'
147 fileappend '.targetFile$' 'separator$'"'numbers2pinyin.pinyin$'"
150 fileappend '.targetFile$' ]
151 if .row < .numberOfRows
152 fileappend '.targetFile$' ,
154 fileappend '.targetFile$' 'newline$'
156 fileappend '.targetFile$' 'tab$''tab$''tab$']'newline$'
160 # .listName$ is name of table to recieve all file names
161 # Labels are Name and Directory
162 # Who says you cannot do recursion in Praat?
163 # This is eerily fragile code.
165 procedure createListOfTables .listName$ .topDirectory$
167 .listName'recursion'$ = .listName$
168 .topDirectory'recursion'$ = .topDirectory$
170 .currentTopDirectory$ = .topDirectory'recursion'$
171 Create Strings as file list... Files '.currentTopDirectory$'/*.Table
172 .numOfFiles'recursion' = Get number of strings
173 for .i to .numOfFiles'recursion'
175 .table'recursion'$ = Get string... '.i'
177 .currentListName$ = .listName'recursion'$
178 select Table '.currentListName$'
180 .numRows = Get number of rows
181 .currentTable$ = .table'recursion'$
182 .currentTopDirectory$ = .topDirectory'recursion'$
183 Set string value... '.numRows' Name '.currentTable$'
184 Set string value... '.numRows' Directory '.currentTopDirectory$'
188 # Recurse into directories
189 .currentTopDirectory$ = .topDirectory'recursion'$
190 Create Strings as directory list... Directories '.currentTopDirectory$'
191 .numOfDirectories'recursion' = Get number of strings
192 for .i'recursion' to .numOfDirectories'recursion'
193 select Strings Directories
194 .currentI = .i'recursion'
195 .directory'recursion'$ = Get string... '.currentI'
196 .currentTopDirectory$ = .topDirectory'recursion'$
197 .currentDirectory$ = .directory'recursion'$
198 call createListOfTables '.listName$' '.currentTopDirectory$'/'.currentDirectory$'
200 select Strings Directories
205 procedure numbers2pinyin .numberstext$
206 .intermediatePinyin$ = .numberstext$
207 # Add a `-quote between vowels
208 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([aeuiov])([0-9])([aeuiov])", "\1\2'\3", 0)
209 # Move numbers to the nucleus vowel
211 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([aeuiov])([^aeuiov0-9]*)([0-9])", "\1\3\2", 0)
213 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([ae])([aeuiov]*)([0-9])", "\1\3\2", 0)
215 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "(ou)([0-9])", "o\2u", 0)
216 # or the second vowel
217 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([uiov][aeuiov])([uiov])([0-9])", "\1\3\2", 0)
219 # Convert all tones to special characters
221 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a1", "ā", 0)
222 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e1", "ē", 0)
223 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u1", "ū", 0)
224 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i1", "ī", 0)
225 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o1", "ō", 0)
226 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v1", "ǖ", 0)
229 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a2", "á", 0)
230 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e2", "é", 0)
231 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u2", "ú", 0)
232 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i2", "í", 0)
233 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o2", "ó", 0)
234 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v2", "ǘ", 0)
237 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a3", "ǎ", 0)
238 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e3", "ě", 0)
239 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u3", "ǔ", 0)
240 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i3", "ǐ", 0)
241 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o3", "ǒ", 0)
242 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v3", "ǚ", 0)
245 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a4", "à", 0)
246 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e4", "è", 0)
247 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u4", "ù", 0)
248 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i4", "ì", 0)
249 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o4", "ò", 0)
250 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v4", "ǜ", 0)
253 # Remove tone 0 symbol completely
254 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "0", "", 0)
255 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a0", "a", 0)
256 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e0", "e", 0)
257 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u0", "u", 0)
258 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i0", "i", 0)
259 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o0", "o", 0)
260 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v0", "ü", 0)
262 # Pick best vowel symbols available in cases not caught before
263 # Ugly clutch to get the 1, 3, 0 tone diacritics at least in the neighbourhood
264 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i(\\[-N0]\^)", "i\\s{_ }\1", 0)
266 .pinyin$ = .intermediatePinyin$
269 procedure convert_praat_to_utf8 .text$
270 .text$ = replace_regex$(.text$, "\\a""", "\xc3\xa4", 0)
271 .text$ = replace_regex$(.text$, "\\A""", "\xc3\x84", 0)
272 .text$ = replace_regex$(.text$, "\\o""", "\xc3\xb6", 0)
273 .text$ = replace_regex$(.text$, "\\O""", "\xc3\x96", 0)
274 .text$ = replace_regex$(.text$, "\\u""", "\xc3\xbc", 0)
275 .text$ = replace_regex$(.text$, "\\U""", "\xc3\x9c", 0)
276 .text$ = replace_regex$(.text$, "\\i""", "\xc3\xaf", 0)
277 .text$ = replace_regex$(.text$, "\\I""", "\xc3\x8f", 0)
278 .text$ = replace_regex$(.text$, "\\e""", "\xc3\xab", 0)
279 .text$ = replace_regex$(.text$, "\\E""", "\xc3\x8b", 0)
280 .text$ = replace_regex$(.text$, "\\y""", "\xc3\xbf", 0)
281 .text$ = replace_regex$(.text$, "\\Y""", "\xc3\x9f", 0)
282 .text$ = replace_regex$(.text$, "\\e'", "\xc3\xa9", 0)
283 .text$ = replace_regex$(.text$, "\\E'", "\xc3\x89", 0)
284 .text$ = replace_regex$(.text$, "\\ss", "\xc3\x9f", 0)
285 .text$ = replace_regex$(.text$, "\\bu", "\xc3\x95", 0)