3 # Load all CoG reference files and compare the recorded test sound to them
4 # Chose the reference file with the lowest distance
5 # 'exclude$' contains a regexp pattern that deselects unwanted reference files
6 # The number of coefficients should match those in the reference files
7 # (which should be readable from a Praat script, but aren't)
10 # word test_word wav/duo1shao3/duo1shao3_duo1shao3_F20DUTB1BS01_2006-12-11T2-00.wav
11 # word reference_dir ../wordlists/CoGMandarinSounds
16 word test_word ../../test/cha2_cha2_252_Sat-Mar-17-22-08-22-2007.wav
17 word reference_dir ../wordlists/CoGMandarinSounds
20 referenceCOGExt$ = "cog"
22 # Read the procedure to calculate the CoG
23 # Note that we preserve the initial but silence any "noise" at the end ot the final
24 include CoGcalculation.praat
26 # Bias Z-normalized value of the distance difference between smallest and correct
30 #keepIntermediates = 0
33 final$ = replace_regex$(pinyin$, "^([^uoaeiv]*)([uoaeiv]+[ngmr]*[0-9])([a-zA-Z0-9]*)$", "\2", 0)
34 initial$ = replace_regex$(pinyin$, "^([^uoaeiv]*)([uoaeiv]+[ngmr]*[0-9])([a-zA-Z0-9]*)$", "\1", 0)
36 if rindex_regex(pinyin$, "^'initial$''final$'([a-zA-Z]+[0-9])") > 0
37 secondSyll$ = replace_regex$(pinyin$, "^'initial$''final$'([a-zA-Z]+[0-9])", "\1", 0)
40 # printline 'initial$'+'final$'+'secondSyll$'
43 if test_word$ <> "" and test_word$ <> "REUSEMFCC"
44 Read from file... 'test_word$'
55 plosives$ = "([ptkbdg])"
56 fricatives$ = "([fsxh]|sh)"
57 affricates$ = "([zcqj]|zh|ch)"
58 semivowels$ = "([ywlr])"
67 # Determine manner of articulation and adapt bias
68 if rindex_regex(pinyin$, "^'nasals$'") > 0
71 elsif rindex_regex(pinyin$, "^'plosives$'") > 0
74 elsif rindex_regex(pinyin$, "^'fricatives$'") > 0
77 elsif rindex_regex(pinyin$, "^'affricates$'") > 0
80 elsif rindex_regex(pinyin$, "^'semivowels$'") > 0
87 Create Strings as file list... ReferenceList 'reference_dir$'/*'final$'.'referenceCOGExt$'
88 numberOfReferences = Get number of strings
90 # Convert input to CoG
91 if test_word$ <> "REUSEMFCC"
97 inputMediaCoG = Get quantile... 0 0 0.5 Hertz
102 smallestDistance=999999
107 choiceReference$ = pinyin$
108 for i from 1 to numberOfReferences
109 select Strings ReferenceList
110 inFile$ = Get string... 'i'
111 referenceName$ = replace_regex$(inFile$, "([^.]+)."+referenceCOGExt$+"$", "\1", 0)
114 # Special cases first!
115 if initial$ = "g" and rindex_regex(inFile$, "^h'final$'") > 0
117 elsif initial$ = "x" and rindex_regex(inFile$, "^k'final$'") > 0
119 elsif (isNasal and rindex_regex(inFile$, "^'nasals$''final$'") > 0)
121 elsif (isFricative and rindex_regex(inFile$, "^'fricatives$''final$'") > 0)
123 elsif (isPlosive and rindex_regex(inFile$, "^'plosives$''final$'") > 0)
125 elsif (isAffricate and rindex_regex(inFile$, "^'affricates$''final$'") > 0)
127 elsif (isSemivowel and rindex_regex(inFile$, "^'semivowels$''final$'") > 0)
129 elsif (isEmpty) and rindex_regex(inFile$, "^'final$'") > 0
134 Read from file... 'reference_dir$'/'inFile$'
136 currentMediaCoG = Get quantile... 0 0 0.5 Hertz
137 factorMedian = inputMediaCoG/currentMediaCoG
138 Formula... self*factorMedian
140 select Pitch 'referenceName$'
142 noprogress To DTW... 24.0 10.0 yes yes no restriction
144 distance = Get distance (weighted)
146 countDistance = countDistance + 1
147 sumDistance = sumDistance + distance
148 sumSqrDistance = sumSqrDistance + distance^2
150 if distance < smallestDistance
151 smallestDistance = distance
152 choiceReference$ = referenceName$
155 if referenceName$ = pinyin$
156 correctDistance = distance
159 logline$ = "'pinyin$' 'inFile$' 'referenceName$' 'distance' 'smallestDistance' 'choiceReference$'"
161 logline$ = logline$+newline$
162 logline$ >> initialrecognitionlog.txt
167 plus Pitch 'referenceName$'
174 meanDistance = sumDistance / countDistance
175 varDistance = (sumSqrDistance - sumDistance^2/countDistance)/(countDistance - 1)
176 stdDistance = sqrt(varDistance)
177 diffDistance = correctDistance - smallestDistance
178 zDistance = diffDistance/stdDistance
180 if zDistance < biasDistance
181 choiceReference$ = pinyin$
182 smallestDistance = correctDistance
188 if initial$ = "d" and rindex_regex(choiceReference$, "^g'final$'") > 0
189 choiceReference$ = "g'final$'"
193 logline$ = "CoG Match: 'pinyin$' <== 'choiceReference$' corr='correctDistance' Z='zDistance'"
194 logline$ = logline$+newline$
195 logline$ >> initialrecognitionlog.txt
197 choiceReference$ > lastInitialRecognitionResult.txt
201 select Strings ReferenceList
202 if test_word$ <> "" and test_word$ <> "REUSEMFCC"