Updated to Praat 6.1.32 and correcting ommissions. Only Praat script committed, not...
[sgc2.git] / addons / Create_JS_wordlists.praat
blobc3e7688d657d8b13e5416d8a602e89a8ba56530e
2 # Create_JS_wordlists.praat
3
4 #     Praat script converting wordlsit files into Javascript code
5 #     
6 #     Copyright (C) 2016  R.J.J.H. van Son and the Netherlands Cancer Institute
7
8 #     This program is free software; you can redistribute it and/or modify
9 #     it under the terms of the GNU General Public License as published by
10 #     the Free Software Foundation; either version 2 of the License, or
11 #     (at your option) any later version.
12
13 #     This program is distributed in the hope that it will be useful,
14 #     but WITHOUT ANY WARRANTY; without even the implied warranty of
15 #     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 #     GNU General Public License for more details.
17
18 #     You should have received a copy of the GNU General Public License
19 #     along with this program; if not, write to the Free Software
20 #     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
21
23 form Source directory and target file
24         sentence Source_directory       ../wordlists
25         sentence Target_file    wordlists_plus.js
26  endform
28 Text writing preferences... UTF-8
30 call tables2javascript "'source_directory$'" 'target_file$'
32 # Definitions
33 procedure tables2javascript .sourceDir$ .targetFile$
34         # Iterate over all tables in the directory
35         if fileReadable(.sourceDir$)
36                 deleteFile(.targetFile$)
37                 # Start new file
38                 fileappend '.targetFile$' /* 'target_file$''newline$'
39                 fileappend '.targetFile$'  * 'newline$'
40                 fileappend '.targetFile$'  * Tables and this code are licensed under the GNU GPL version 2'newline$'
41                 fileappend '.targetFile$'  * or later.'newline$'
42                 fileappend '.targetFile$'  * 'newline$'
43                 fileappend '.targetFile$'  */ 'newline$'
44                 fileappend '.targetFile$'  'newline$'
45                 
46                 fileappend '.targetFile$' var wordlists = ['newline$'
47                 # 
48                 .nameStart = rindex(.targetFile$, "/")+1
49                 .nameEnd = rindex(.targetFile$, ".") -.nameStart
50                 .newTableNameList$ = mid$(.targetFile$, .nameStart, .nameEnd)
51                 Create Table with column names... '.newTableNameList$' 0 Name
53                 # Create a list of Tables with Paths
54                 Create Table with column names... ListOfTables 0 Name Directory
55                 recursion = 0
56                 call createListOfTables ListOfTables '.sourceDir$'
57         
58                 select Table ListOfTables
59             .numOfTables = Get number of rows
60                 for .i to .numOfTables
61                          select Table ListOfTables
62                         .table$ = Get value... '.i' Name
63                         .tableDir$ = Get value... '.i' Directory
64                         # Get Table
65                         Read from file... '.tableDir$'/'.table$'
66                         .tableName$ = selected$("Table")
67                         if .tableName$ = "wordlist" or .tableName$ = "table"
68                                 .nameStart = rindex(.tableDir$, "/")+1
69                                 .nameEnd = length(.tableDir$)+1 -.nameStart
70                                 .newTableName$ = mid$(.tableDir$, .nameStart, .nameEnd)
71                                 select Table '.tableName$'
72                                 Rename... '.newTableName$'
73                                 .tableName$ = selected$("Table")
74                         endif
76                         select Table '.newTableNameList$'
77                         Append row
78                         .currentTableNum = Get number of rows
79                         Set string value... '.currentTableNum' Name '.tableName$'
81                         # Convert table
82                         call table2objectlist '.tableName$' '.targetFile$'
83                         fileappend '.targetFile$' 'tab$''tab$']
84                         if .i < .numOfTables
85                                 fileappend '.targetFile$' ,
86                         endif   
87                         fileappend '.targetFile$' 'newline$'
90                         select Table '.tableName$'
91                         Remove
92                 endfor
94                 # Close the file
95                 fileappend '.targetFile$' 'tab$']'newline$''newline$'
97                 select Table '.newTableNameList$'
98                 plus Table ListOfTables
99                 Remove
100         else
101                 exit Directory not found: '.sourceDir$'
102         endif
103 endproc
105 # Convert a single table to a Praat script
106 procedure table2objectlist .tableName$ .targetFile$
107         select Table '.tableName$'
108         
109         .space$ = " "
110         # Collect information
111         .numberOfColumns = Get number of columns
112         .numberOfRows = Get number of rows
113         
114         # Set name of procedure as variable
115         .tableVariableName$ = replace_regex$(.tableName$, "_", " ", 0);
116         
117         # Start output
118         fileappend '.targetFile$' 'tab$''tab$'['newline$'
119         fileappend '.targetFile$' 'tab$''tab$'"'.tableVariableName$'", ['newline$'
120         # Create table with columns
122         # Fill the table
123         .labelList$[1] = "Pinyin"
124         .labelList$[2] = "Character"
125         .labelList$[3] = "Translation"
126         .labelList$[4] = "Lesson"
127         .labelList$[5] = "Sound"
128         
129         for .row to .numberOfRows
130                 fileappend '.targetFile$' 'tab$''tab$''tab$'[
131                 separator$ = ""
132                 for .col to 5
133                         .label$ = .labelList$[.col]
134                         .colIDX = Get column index: .label$
135                         if .colIDX > 0
136                                 .value$ = Get value... '.row' '.label$'
137                         else
138                                 .value$ = "-"
139                         endif
140                         if .value$ = ""
141                                 .value$ = "-"
142                         endif
143                         fileappend '.targetFile$' 'separator$'"'.value$'"
144                         separator$ = ","
145                         if .label$ = "Pinyin"
146                                 call numbers2pinyin '.value$'
147                                 fileappend '.targetFile$' 'separator$'"'numbers2pinyin.pinyin$'"
148                         endif
149                 endfor
150                 fileappend '.targetFile$' ]
151                 if .row < .numberOfRows
152                         fileappend '.targetFile$' ,
153                 endif
154                 fileappend '.targetFile$' 'newline$'
155         endfor
156         fileappend '.targetFile$' 'tab$''tab$''tab$']'newline$'
157         
158 endproc
160 # .listName$ is name of table to recieve all file names
161 # Labels are Name and Directory
162 # Who says you cannot do recursion in Praat?
163 # This is eerily fragile code.
164 recursion = 0
165 procedure createListOfTables .listName$ .topDirectory$
166         recursion += 1
167         .listName'recursion'$ = .listName$
168         .topDirectory'recursion'$ = .topDirectory$
169         # Files
170         .currentTopDirectory$ = .topDirectory'recursion'$
171     Create Strings as file list... Files '.currentTopDirectory$'/*.Table
172         .numOfFiles'recursion' = Get number of strings
173         for .i to .numOfFiles'recursion'
174                 select Strings Files
175                 .table'recursion'$ = Get string... '.i'
176                 
177                 .currentListName$ = .listName'recursion'$
178                 select Table '.currentListName$'
179                 Append row
180                 .numRows = Get number of rows
181                 .currentTable$ = .table'recursion'$
182                 .currentTopDirectory$ = .topDirectory'recursion'$
183                 Set string value... '.numRows' Name '.currentTable$'
184                 Set string value... '.numRows' Directory '.currentTopDirectory$'
185         endfor
186         select Strings Files
187         Remove
188         # Recurse into directories
189         .currentTopDirectory$ = .topDirectory'recursion'$
190     Create Strings as directory list... Directories '.currentTopDirectory$'
191         .numOfDirectories'recursion' = Get number of strings
192         for .i'recursion' to .numOfDirectories'recursion'
193                 select Strings Directories
194                 .currentI = .i'recursion'
195                 .directory'recursion'$ = Get string... '.currentI'
196                 .currentTopDirectory$ = .topDirectory'recursion'$
197                 .currentDirectory$ = .directory'recursion'$
198                 call createListOfTables '.listName$' '.currentTopDirectory$'/'.currentDirectory$'
199         endfor
200         select Strings Directories
201         Remove
202         recursion -= 1
203 endproc
205 procedure numbers2pinyin .numberstext$
206         .intermediatePinyin$ = .numberstext$
207         # Add a `-quote between vowels
208         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([aeuiov])([0-9])([aeuiov])", "\1\2'\3", 0)
209         # Move numbers to the nucleus vowel
210         # To the vowel
211         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([aeuiov])([^aeuiov0-9]*)([0-9])", "\1\3\2", 0)
212         # Either a/e
213         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([ae])([aeuiov]*)([0-9])", "\1\3\2", 0)
214         # Or the Oo in /ou/
215         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "(ou)([0-9])", "o\2u", 0)
216         # or the second vowel
217         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([uiov][aeuiov])([uiov])([0-9])", "\1\3\2", 0)
218         
219         # Convert all tones to special characters
220         # Tone 1
221         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a1", "ā", 0)
222         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e1", "ē", 0)
223         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u1", "ū", 0)
224         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i1", "ī", 0)
225         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o1", "ō", 0)
226         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v1", "ǖ", 0)
227         
228         # Tone 2
229         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a2", "á", 0)
230         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e2", "é", 0)
231         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u2", "ú", 0)
232         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i2", "í", 0)
233         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o2", "ó", 0)
234         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v2", "ǘ", 0)
235         
236         # Tone 3
237         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a3", "ǎ", 0)
238         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e3", "ě", 0)
239         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u3", "ǔ", 0)
240         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i3", "ǐ", 0)
241         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o3", "ǒ", 0)
242         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v3", "ǚ", 0)
244         # Tone 4
245         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a4", "à", 0)
246         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e4", "è", 0)
247         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u4", "ù", 0)
248         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i4", "ì", 0)
249         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o4", "ò", 0)
250         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v4", "ǜ", 0)
251         
252         # Tone 0
253         # Remove tone 0 symbol completely
254         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "0", "", 0)
255         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a0", "a", 0)
256         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e0", "e", 0)
257         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u0", "u", 0)
258         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i0", "i", 0)
259         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o0", "o", 0)
260         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v0", "ü", 0)
261         
262         # Pick best vowel symbols available in cases not caught before
263         # Ugly clutch to get the 1, 3, 0 tone diacritics at least in the neighbourhood
264         .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i(\\[-N0]\^)", "i\\s{_ }\1", 0)
266         .pinyin$ = .intermediatePinyin$
267 endproc
269 procedure convert_praat_to_utf8 .text$
270         .text$ = replace_regex$(.text$, "\\a""", "\xc3\xa4", 0)
271         .text$ = replace_regex$(.text$, "\\A""", "\xc3\x84", 0)
272         .text$ = replace_regex$(.text$, "\\o""", "\xc3\xb6", 0)
273         .text$ = replace_regex$(.text$, "\\O""", "\xc3\x96", 0)
274         .text$ = replace_regex$(.text$, "\\u""", "\xc3\xbc", 0)
275         .text$ = replace_regex$(.text$, "\\U""", "\xc3\x9c", 0)
276         .text$ = replace_regex$(.text$, "\\i""", "\xc3\xaf", 0)
277         .text$ = replace_regex$(.text$, "\\I""", "\xc3\x8f", 0)
278         .text$ = replace_regex$(.text$, "\\e""", "\xc3\xab", 0)
279         .text$ = replace_regex$(.text$, "\\E""", "\xc3\x8b", 0)
280         .text$ = replace_regex$(.text$, "\\y""", "\xc3\xbf", 0)
281         .text$ = replace_regex$(.text$, "\\Y""", "\xc3\x9f", 0)
282         .text$ = replace_regex$(.text$, "\\e'", "\xc3\xa9", 0)
283         .text$ = replace_regex$(.text$, "\\E'", "\xc3\x89", 0)
284         .text$ = replace_regex$(.text$, "\\ss", "\xc3\x9f", 0)
285         .text$ = replace_regex$(.text$, "\\bu", "\xc3\x95", 0)
286 endproc