5 sentence Recorded_audio aa.wav
6 comment Entering -999 in any field disables the feature or change
7 real Pitch 70 (= Hz, mean)
8 real Pitch_SD 15 (= % of mean)
9 real Duration 1.3 (= mult. factor)
10 real Syllable_rate 3.4 (= syll/sec, overrides Duration)
12 real Bubbles 1 (= fraction, inactive)
13 real Bubbles_SNR 10 (= dB SNR)
16 positive Voicing_floor_(dB) 15 (= below maximum)
20 ########################################################################
22 # VoiceConversion.praat
24 # Change the input speech to resemble Tracheoesophageal speech.
25 # Changes the Pitch (F0) and pitch movements, duration. Filtered noise
26 # is added as well as filtered "bubble" sounds.
27 # Increase the Jitter and Shimmer of a speech recording to the
28 # number given. Cannot reduce Jitter or Shimmer.
29 # Note that Jitter and Shimmer are ill-defined in anything but
32 # Uses the To PointProcess (periodic, cc) to calculate the jitter
33 # and To PointProcess (periodic, peaks): 60, 300, "yes", "yes"
34 # to change the timing of the periods.
36 # Periods are moved with Overlap-and-Add
38 # Shimmer is adapted using additive noise over an intensity tier and
39 # adapting each period individually. Periods are determined with the
40 # To PointProcess (periodic, peaks) pulses.
42 ########################################################################
44 # Copyright (C) 2016-2017 NKI-AVL, R. J. J. H. van Son
47 # This program is free software: you can redistribute it and/or modify
48 # it under the terms of the GNU General Public License as published by
49 # the Free Software Foundation, either version 3 of the License, or
50 # (at your option) any later version.
52 # This program is distributed in the hope that it will be useful,
53 # but WITHOUT ANY WARRANTY; without even the implied warranty of
54 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
55 # GNU General Public License for more details.
57 # You should have received a copy of the GNU General Public License
58 # along with this program. If not, see <http://www.gnu.org/licenses/>.
60 # Full license text is available at:
61 # http://www.gnu.org/licenses/gpl-3.0.html
63 ########################################################################
65 # Input parameters (<=-999 means "do not change"):
67 # Input file A file name (with full path). If a Sound object is selected, that will be used instead
68 # Pitch Average pitch of the new speech in Hz [F'(t) = Fnew/Fold * F(t)]
69 # Pitch SD Standard deviation of the Pitch of the new speech in Hz (compresses pitch movements)
70 # [SD'(t) = SDnew/SDold * (F(t) - Faverage) + Faverage]
71 # Duration Factor with which to multiply the duration
72 # Syllable_rate Articulation rate in syll/sec, overrides Duration (= SRold / SRnew, ignored if ratio < 1)
73 # HNR Signal to Noise ratio of new noise added to obtain the HNR given
74 # Bubbles Fraction of time bubble sounds are the source (0-1, 0 = disable). Use source "TE_source_bubbles.wav"
75 # Bubbles SNR Signal to Noise ratio of bubble sounds added (in dB)
76 # Jitter New jitter in %
77 # Shimmer New Shimmer in %
78 # Voicing floor Lowest level of sound still considered voiced, in dB below the maximum
80 # Help Print this text and exit
83 # The input sound converted according to the specifications
85 # Print debugging information
90 # A Praat Sound object with the transformed speech
93 # praat VoiceConversion.praat Speech/Example1.wav 70 15 1.3 5 5 15 5 10 15 no
101 printline Input parameters (<= -999 means do not change feature):
102 printline Input file'tab$''tab$'A file name (with full path). If a Sound object is selected, that will be used instead
103 printline Pitch'tab$''tab$''tab$'Average pitch of the new speech in Hz [F'(t) = Fnew/Fold * F(t)]
104 printline Pitch SD'tab$''tab$'Standard deviation of the Pitch of the new speech in Hz (compresses pitch movements)
105 printline 'tab$''tab$''tab$''tab$'[SD'(t) = SDnew/SDold * (F(t) - Faverage) + Faverage]
106 printline Duration'tab$''tab$'Factor with which to multiply the duration
107 printline Syllable_rate'tab$'Articulation rate in syll/sec, overrides Duration (= SRold / SRnew, ignored if ratio < 1)
108 printline HNR'tab$''tab$''tab$''tab$'Signal to Noise ratio of new noise added to obtain the HNR given
109 printline Bubbles'tab$''tab$''tab$'Fraction of time bubble sounds are the source (0-1, 0 = disable). Use source "TE_source_bubbles.wav"
110 printline Bubbles SNR'tab$''tab$'Signal to Noise ratio of bubble sounds added (in dB)
111 printline Jitter'tab$''tab$''tab$'New jitter in %
112 printline Shimmer'tab$''tab$''tab$'New Shimmer in %
113 printline Voicing floor'tab$'Lowest level of sound still considered voiced, in dB below the maximum
115 printline Help'tab$''tab$''tab$'Print this text and exit
118 printline The input sound converted according to the specifications
125 if numberOfSelected("Sound") > 0
126 .recordedSound = selected("Sound")
127 elsif recorded_audio$ <> "" and fileReadable(recorded_audio$)
128 .recordedSound = Read from file: recorded_audio$
129 Rename: "RecordedSpeech"
132 bubblesAudioName$ = "bubbles.wav"
134 te_source_bubbles_name$ = "TE_source_bubbles.wav"
136 .thresshold = -voicing_floor
139 select .recordedSound
140 global.setIntensity = Get intensity (dB)
142 call convert_speechOverlapAndAdd .recordedSound .thresshold jitter shimmer pitch pitch_SD duration syllable_rate hNR bubbles bubbles_SNR
144 # Definitions of functions
147 procedure convert_speechOverlapAndAdd .recordedSound .thresshold .jitter .shimmer .pitch .pitch_SD .durationFactor .newSyllable_rate .newHNR .bubble_fraction .bubble_snr
148 ###################################################################
150 # Get the syllable rate and calculate durationFactor.
151 # The durationFactor = 1 if the result is smaller than 1
153 ###################################################################
154 if .newSyllable_rate > 0
155 call syllable_nuclei -25 2 0.3 1 .recordedSound
156 if syllable_nuclei.voicedcount > 1 and syllable_nuclei.asd < 1
157 .durationFactor = syllable_nuclei.articulationrate / .newSyllable_rate
158 if .durationFactor < 1
166 ###################################################################
168 # Change the Pitch, pitch SD, and duration
170 ###################################################################
171 call change_pitch_duration .recordedSound .pitch .pitch_SD .durationFactor
172 .newPitchSound = selected("Sound")
173 .duration = Get total duration
174 .sampleFreq = Get sampling frequency
176 ###################################################################
178 # Get the new VUV textgrid, pulses, and intensity
180 ###################################################################
181 call extractVoicingParameters .newPitchSound .thresshold
182 .recordedTextGrid = selected("TextGrid")
183 .recordedPulses = selected("PointProcess")
184 .recordedInt = selected("Intensity")
186 ###################################################################
188 # Create a version with noise as a source
190 ###################################################################
192 call create_additive_noise .newPitchSound .recordedTextGrid
193 .additiveNoise = selected("Sound")
195 .additiveNoise = Create Sound from formula: "WhiteNoise", 1, 0, .duration, .sampleFreq, "0"
198 ###################################################################
200 # Create a version with "bubbles" as a source
202 ###################################################################
203 if .bubble_fraction > 0 and .bubble_snr > -999
204 call synth_with_source_signal '.newPitchSound' '.recordedPulses' .bubble_fraction .bubble_snr 'te_source_bubbles_name$'
205 .additiveBubbles = selected("Sound")
207 .additiveBubbles = Create Sound from formula: "WhiteNoise", 1, 0, .duration, .sampleFreq, "0"
210 #call add_bubbles '.newPitchSound' '.bubble_fraction' '.bubble_snr' '.recordedTextGrid' 'bubblesAudioName$'
211 #.additiveBubbles = selected("Sound")
213 ###################################################################
215 # Change Jitter and Shimmer, use CC to determine jitter and Peaks
216 # to change the periods.
218 ###################################################################
219 selectObject: .newPitchSound
220 .newPulsesPeaks = To PointProcess (periodic, peaks): 60, 300, "yes", "yes"
222 selectObject: .newPulsesPeaks
223 .newPointProcess = Copy: "New_Pulses"
224 call set_jitter .jitter .newPointProcess .recordedPulses
225 call apply_overlap_add .newPitchSound .newPulsesPeaks .recordedTextGrid .newPointProcess .shimmer
226 .newSound = selected("Sound")
228 ###################################################################
232 ###################################################################
235 selectObject: .recordedPulses
236 .old_jitter = Get jitter (local): 0, 0, 0.0001, 0.03, 2
237 selectObject: .newPointProcess
238 .newPPjitter = Get jitter (local): 0, 0, 0.0001, 0.03, 2
239 selectObject: .recordedSound
241 .old_amplitude = To AmplitudeTier (period): 0, 0, 0.0001, 0.03, 2
242 .old_shimmer = Get shimmer (local): 0.0001, 0.03, 2
245 call syllable_nuclei -25 2 0.3 1 .recordedSound
246 .oldArticulationRate = syllable_nuclei.articulationrate
247 call syllable_nuclei -25 2 0.3 1 .newSound
248 .newArticulationRate = syllable_nuclei.articulationrate
250 selectObject: .newSound
251 .pointP = To PointProcess (periodic, cc): 60, 300
252 .new_jitter = Get jitter (local): 0, 0, 0.0001, 0.03, 2
254 selectObject: .newSound
256 .new_amplitude = To AmplitudeTier (period): 0, 0, 0.0001, 0.03, 2
257 .new_shimmer = Get shimmer (local): 0.0001, 0.03, 2
259 appendInfoLine: "New Jitter: '.new_jitter:1%' ('.old_jitter:1%' ~> '.newPPjitter:1%')"
260 appendInfoLine: "New Shimmer: '.new_shimmer:1%' ('.old_shimmer:1%')"
261 appendInfoLine: "New Syll rate: '.newArticulationRate:1' ('.oldArticulationRate:1')"
263 selectObject: .old_amplitude, .pointP, .new_amplitude
267 ###################################################################
269 # Add noise to result
271 ###################################################################
272 call add_sounds .newSound .additiveNoise .newHNR
273 .resultNoise = selected("Sound")
276 ###################################################################
278 # Add bubbles to result
280 ###################################################################
281 call add_sounds .resultNoise .additiveBubbles .bubble_snr
282 .result = selected("Sound")
285 ###################################################################
289 ###################################################################
290 selectObject: .newPitchSound, .recordedTextGrid, .recordedPulses, .recordedInt, .newPointProcess, .newPulsesPeaks, .newSound, .additiveNoise, .additiveBubbles, .resultNoise
293 selectObject: .result
297 procedure change_pitch_duration .sound .pitch .pitchFraction .durationFactor
299 .duration = Get total duration
301 .manipulation = To Manipulation: 0.01, 70, 300
302 .pitchTier = Extract pitch tier
303 .currentPitch = Get mean (points): 0, 0
304 .pitch_SD = .pitchFraction / 100 * .pitch
307 .durationTier = Extract duration tier
310 if .durationFactor > 0
311 .numPoints = Get number of points
315 Formula: "self*'.durationFactor'"
318 Replace duration tier
323 .factor = (.pitch / .currentPitch)
324 Multiply frequencies: 0, .duration, .factor
325 .currentSD = Get standard deviation (points): 0, 0
328 .factor = .pitch_SD / .currentSD
329 Formula: "'.pitch' + (self - '.pitch') * '.factor'"
338 if .currentPitch > 0 or .durationFactor > 0
340 .newSound = Get resynthesis (overlap-add)
343 .newSound = Copy: "New Sound"
353 # Get all the voice related measures
354 procedure extractVoicingParameters .recordedSound .thresshold
355 # Thresshold: the lowest level of voiced sounds
356 select .recordedSound
357 .pointPcc = To PointProcess (periodic, cc): 60, 300
358 Rename: "RecordedPulses"
359 .textGrid = To TextGrid (vuv): 0.02, 0.01
360 Rename: "RecordedVoicing"
361 .numIntervals = Get number of intervals: 1
363 # Correct voicing boundaries
364 select .recordedSound
365 .intensity = To Intensity: 100, 0, "yes"
366 Rename: "RecordedIntensity"
367 .silences = To TextGrid (silences): .thresshold, 0.1, 0.05, "silent", "sounding"
370 for .i to .numIntervals
372 .label$ = Get label of interval: 1, .i
374 .start = Get starting point: 1, .i
375 .end = Get end point: 1, .i
377 # Starting point of voiced interval
379 .s = Get interval at time: 1, .start
380 .sLabel$ = Get label of interval: 1, .s
381 if .sLabel$ = "silent"
382 .sStart = Get starting point: 1, .s
383 .sEnd = Get end point: 1, .s
386 Set interval text: 1, .i, "U"
387 # Shift boundaries: Insert&Remove
388 Insert boundary: 1, .sEnd
389 Set interval text: 1, .i+1, "V"
391 Set interval text: 1, .i, ""
392 Remove left boundary: 1, .i
395 # Low intensity, unvoiced
396 Set interval text: 1, .i, "U"
403 for .i to .numIntervals
405 .label$ = Get label of interval: 1, .i
407 .start = Get starting point: 1, .i
408 .end = Get end point: 1, .i
410 # Starting point of voiced interval
412 .s = Get interval at time: 1, .end
413 .sLabel$ = Get label of interval: 1, .s
414 if .sLabel$ = "silent"
415 .sStart = Get starting point: 1, .s
416 .sEnd = Get end point: 1, .s
419 Set interval text: 1, .i, "U"
420 # Shift boundaries: Insert&Remove
421 Insert boundary: 1, .sStart
422 Set interval text: 1, .i, "V"
424 Set interval text: 1, .i+1, ""
425 Remove right boundary: 1, .i+1
428 # Low intensity, unvoiced
429 Set interval text: 1, .i, "U"
443 # LPC analysis resynthesis with white noise as the new source. Only
444 # resynthesize the voiced parts.
445 procedure create_additive_noise .sound .vuvTextGrid
447 .duration = Get total duration
448 .sampleFreq = Get sampling frequency
450 .additiveNoiseSound = -1
454 .downsampled = Resample: 10000, 50
455 .lpc = To LPC (autocorrelation): 10, 0.025, 0.005, 50
457 .source = Filter (inverse)
458 .sourceInt = To Intensity: 70, 0, "yes"
459 .sourceIntTier = To IntensityTier (peaks)
461 # Create additive noise
462 .noise = Create Sound from formula: "WhiteNoise", 1, 0, .duration, .sampleFreq, "randomGauss(0,0.1)"
464 .filteredNoise = Filter: "no"
466 .additiveNoiseSoundTMP = Multiply: "yes"
467 call set_VUV_to_zero .additiveNoiseSoundTMP .vuvTextGrid U
468 .additiveNoiseSound = Resample: .sampleFreq, 50
470 selectObject: .noise, .filteredNoise, .additiveNoiseSoundTMP
473 selectObject: .downsampled, .lpc, .source, .sourceInt, .sourceIntTier
476 if .additiveNoiseSound <= 0
477 .additiveNoiseSound = Create Sound from formula: "AdditiveNoise", 1, 0, .duration, .sampleFreq, "0"
480 select .additiveNoiseSound
483 # Add sounds. If either sounds does not exist, use the other.
484 # If the s1/s2 ration <= -999, copy the first sound (if it exists)
485 # else, copy the second source.
486 procedure add_sounds .sound1 .sound2 .s1_s2ratioDB
492 if .sound1 <= 0 and .sound2 <= 0
493 exitScript: "add_sounds: No sounds to add"
496 if .s1_s2ratioDB <> -999
499 .tmp1 = Copy: "Sound1"
500 .int1 = Get intensity (dB)
504 .duration = Get total duration
505 .sampleFreq = Get sampling frequency
510 .tmp2 = Copy: "Sound2"
511 .int2 = Get intensity (dB)
515 .duration = Get total duration
516 .sampleFreq = Get sampling frequency
520 .tmp1 = Create Sound from formula: "BubblesNoise1", 1, 0, .duration, .sampleFreq, "0"
522 .tmp2 = Create Sound from formula: "BubblesNoise2", 1, 0, .duration, .sampleFreq, "0"
525 if .int1 - .int2 <> .s1_s2ratioDB
526 .ratio = .s1_s2ratioDB - (.int1 - .int2)
528 Scale intensity: .int1 + .ratio / 2
530 Scale intensity: .int2 - .ratio / 2
532 selectObject: .tmp1, .tmp2
533 .stereo = Combine to stereo
534 .addedSound = Convert to mono
536 selectObject: .stereo, .tmp1, .tmp2
540 selectObject: .sound1
542 selectObject: .sound2
545 .addedSound = Copy: "BubblesNoise"
548 selectObject: .addedSound
552 # Set Jitter to a specified number
554 # Ti = ti - ti-1 (interval i)
555 # Jitter (absolute) is Sum[ abs(Ti - Ti-1) ] / N-1
556 # Jitter = Jitter (absolute) / mean(Ti)
558 # For a Normal distribution
559 # E(|X|) = sqrt(2/pi) * stdev(X)
562 # E(Ti^2) = var(Ti) + E(Ti)^2
563 # E(Ti*Ti-1) = cor(Ti, Ti-1) + E(Ti)^2
564 # var(Ti - Ti-1) = E(Ti^2 - 2*Ti*Ti-1 + Ti-1^2)
565 # = 2*E(Ti^2) - 2*E(Ti*Ti-1)
566 # = 2*[ var(Ti) * (1 - cor(Ti, Ti-1)) ]
568 # Combine, assuming a Normal distribution:
569 # Jitter = E(|Ti - Ti-1|) / E(Ti)
570 # = sqrt(2/pi) * stdev(Ti - Ti-1) / mean(Ti)
571 # = sqrt(2/pi * var(Ti - Ti-1)) / mean(Ti)
572 # = sqrt[ 4/pi * ( var(Ti) * (1 - cor(Ti, Ti-1)) ) ] / mean(Ti)
574 # Change Ti -> T'i; Jitter -> a*Jitter while keeping mean(Ti) = mean(T'i) constant
575 # ei = (ti + ti-2)/2 - ti-1
576 # Jitter' = a * Jitter
577 # = a * sqrt[ 4/pi * var(Ti - Ti-1) ] / mean(Ti)
579 # => var(T'i - T'i-1) = a^2 * var(Ti - Ti-1)
580 # = a^2 * E[ (Ti - Ti-1)^2 ]
581 # = a^2 * E[ (ti - ti-1 - ti-1 + ti-2)^2 ]
582 # = a^2 * 2 * E[ ((ti + ti-2)/2 - ti-1)^2 ]
583 # = a^2 * 2 * E[ ei^2 ]
584 # = 2 * E[ (a*ei)^2 ]
587 # Generalizing, var(T'i - T'i-1) = 2*(var(ti-1) + var(ti) + var(ti+1))
588 # To increase Jitter -> Jitter'
589 # 1) Determine var(Ti - Ti-1) = (Jitter * mean(Ti))^2 * pi / 2
590 # 2) Calculate var(T'i - T'i-1) = (Jitter' * mean(T'i))^2 * pi / 2
591 # 3) Determine var to add:
592 # add_var(Ti - Ti-1) = var(T'i - T'i-1) - var(Ti - Ti-1)
593 # 4) Var of Noise to add per ti: add_var(ti) = add_var(Ti - Ti-1)/(2*3)
594 # 5) Sd of Noise to add per ti: add_sd(ti) = sqrt(add_var(ti))
597 # Converts .pulses into pulses with new Jitter
598 procedure set_jitter .newJitter .pulses .pulsesCC
600 if .pulses > 0 and .newJitter > 0
603 # Use CC to determine real jitter
607 .current_jitter = Get jitter (local): 0, 0, 0.0001, 0.03, 2
608 .current_abs_jitter = Get jitter (local, absolute): 0, 0, 0.0001, 0.03, 2
609 .current_mean_period = Get mean period: 0, 0, 0.0001, 0.03, 2
610 .current_stdev_period = Get stdev period: 0, 0, 0.0001, 0.03, 2
612 if .newJitter > .current_jitter
613 .current_var = .current_abs_jitter**2 * pi/2
614 .end_var = (.newJitter * .current_mean_period)**2 * pi/2
615 # The variance to add per boundary (total / (2*3))
616 .add_var = (.end_var - .current_var) / 6
617 .stdev_e = sqrt(.add_var)
619 # Keep the original pulses just is case the order of the pulses might change
621 .origPulses = Copy: "Original_Pulses"
622 .numPoints = Get number of points
625 # Change jitter by moving the ti according to
626 # t'i = ti - randomGauss(0, stdev(e'))
627 for .p from 1 to .numPoints
629 .t = Get time from index: .p
630 .new_t = .t - randomGauss(0, .stdev_e)
632 # Remove current point
634 .r = Get nearest index: .t
642 pause New jitter: '.newJitter' must be larger than current jitter '.current_jitter:4'
645 # Calculate new jitter
647 .jitter_new = Get jitter (local): 0, 0, 0.0001, 0.03, 2
649 .current_jitter *= 100
656 # We cannot use the shimmer of a sentence, so we can only "add" shimmer
658 # .new_shimmer is in %
659 # .sound: Source Sound
660 # .pulses: PointProcess
661 # .voicing: VUV TextGrid
662 # .new_shimmer: New shimmer in %
663 procedure increase_shimmer .sound .pulses .voicing .newShimmer
668 # Create Amplitude Tier and get current shimmer
670 .duration = Get total duration
672 .current_amplitude = To AmplitudeTier (period): 0, 0, 0.0001, 0.03, 2
673 .current_shimmer = Get shimmer (local): 0.0001, 0.03, 2
674 select .current_amplitude
675 .numPoints = Get number of points
676 .ampreal = Down to TableOfReal
679 for .p from 1 to .numPoints
681 .tmp = Get value: .p, 2
687 .meanAmp = .sumamp / .n
689 # Sd must be multiplied with the amplitude
690 if .newShimmer > .current_shimmer
691 .new_var = (.newShimmer**2 - .current_shimmer**2) * .meanAmp**2 * pi / 2
693 .new_var = .newShimmer**2 * .meanAmp**2 * pi / 2
696 .new_sd = sqrt(.new_var / 2)
701 .new_amplitude = Create AmplitudeTier: "New_Amplitude", 0, .duration
702 for .p from 1 to .numPoints
704 .t = Get value: .p, 1
705 .a = Get value: .p, 2
710 .new_a = .a - randomGauss(0, .new_sd)
716 select .new_amplitude
717 Add point: .t, .new_a / .a
723 # Set unvoiced parts to 1
724 select .new_amplitude
728 .numIntervals = Get number of intervals: 1
729 for .i from 1 to .numIntervals
731 .t = Get end point: 1, .i
732 select .new_amplitude
737 plus .current_amplitude
740 # Overlay shimmer over sound
743 .new_sound = Multiply
744 Rename: "NewSound_Shimmer"
748 .shimmer_new = Get shimmer (local): 0, 0, 0.0001, 0.02, 1.3, 1.6
750 .current_shimmer *= 100
753 select .new_amplitude
758 .new_sound = Copy: "NewSound_Shimmer"
764 # Make a copy of the source to the target matching the pulses in source and target
765 # Copies fragments around pulses in sourcePulses under the direction of the
766 # corresponding pulses in targetPulses using the Overlap&Add method (Gaussian window)
768 # Ignores voiceless parts, ie, intervals between pulses > .maxInt
769 # For voices, .maxInt should be ~0.02 (F0 > 50Hz). For other sounds, e.g., bubbles, this
770 # should be increased to fit the whole sound between pulses.
772 # Midpoint between the pulses, periods add up to a factor of ~1.04.
773 # At the pulses themselves, it adds up to ~1.12 (summed left and right)
775 procedure overlap_add .sourceSound .sourcePulses .targetSound .targetPulses .maxInt
776 # Create empty .targetSound if .targetSound does not exist
779 .duration = Get total duration
780 .samplingFrequency = Get sampling frequency
781 .targetSound = Create Sound from formula: "Target Sound", 1, 0, .duration, .samplingFrequency, "0"
783 # Default, just copy the source pulses
784 if .targetPulses <= 0
785 .targetPulses = .sourcePulses
788 # Maximum interval between pulses (maximum pitch period)
794 .sourceName$ = replace_regex$(selected$(), " ", "_", 0)
796 .targetName$ = replace_regex$(selected$(), " ", "_", 0)
798 # Iterate over target pulses
800 .numPulses = Get number of points
804 .tTarget = Get time from index: .p
805 .pLeft = Get interval: .tTarget - 0.001
806 .pRight = Get interval: .tTarget + 0.001
809 .q = Get nearest index: .tTarget
810 .tSource = Get time from index: .q
811 .qLeft = Get interval: .tSource - 0.001
812 .qRight = Get interval: .tSource + 0.001
813 # Gaussian window parameters (FWHM Left and Right)
814 # FWHM = 2*sqrt(2*ln(2)) * c
815 .cL = min(.pLeft,.qLeft)/(2*sqrt(2*ln(2)))
816 .cR = min(.pRight,.qRight)/(2*sqrt(2*ln(2)))
817 if not( .cL = undefined or .cL > .maxInt/(2*sqrt(2*ln(2))) or .cR = undefined or .cR > .maxInt/(2*sqrt(2*ln(2))) )
820 Formula (part): .tTarget-.margin, .tTarget+.margin, 1, 1, "if x<.tTarget then self + '.sourceName$'((x - .tTarget) + .tSource)*exp(-1*(((x - .tTarget)/.cL)^2)/2) else self + '.sourceName$'((x - .tTarget) + .tSource)*exp(-1*(((x - .tTarget)/.cR)^2)/2) endif"
828 procedure apply_overlap_add .sourceAudio .sourcePulses .vuvTextGrid .targetPulses .newShimmer
829 # Use overlap-add to add new source intervals
830 # Copy only voiced pulses
831 call set_VUV_to_zero .targetPulses .vuvTextGrid U
832 # Create a copy of the old source with voiced parts zeroed
834 .testSource = Copy: "OaAsound"
835 call set_VUV_to_zero .testSource .vuvTextGrid V
837 # Copy the voiced parts of the new source to the zeroed voiced parts of the old source
838 call overlap_add .sourceAudio .sourcePulses .testSource .targetPulses 0.02
839 call increase_shimmer .testSource .targetPulses .vuvTextGrid .newShimmer
840 .newSound = selected("Sound")
841 Scale intensity: global.setIntensity
850 # Set intervals matching a label text to Zero or remove the pulses
851 # Works on Sound and Pulses
852 procedure set_VUV_to_zero .sound .vuvTextGrid .zeroLabel$
854 .objectType$ = selected$()
855 .objectType$ = extractWord$ (.objectType$, "")
857 .numIntervals = Get number of intervals: 1
858 # Zero out VU intervals
859 for .i to .numIntervals
861 .vuvLabel$ = Get label of interval: 1, .i
862 .start = Get starting point: 1, .i
863 .end = Get end point: 1, .i
864 if .vuvLabel$ = .zeroLabel$
866 if .objectType$ = "Sound"
867 Set part to zero: .start, .end, "at nearest zero crossing"
868 elsif .objectType$ = "PointProcess"
869 Remove points between: .start, .end
871 printline Unsupported object type for set_VUV_to_zero
880 # LPC analysis-resynthesis with source. Ignore voice/voiceless
883 procedure synth_with_source_signal .sound .pulses .fraction .snr .sourceAudioName$
886 .duration = Get total duration
887 .samplingFrequency = Get sampling frequency
894 .targetIntensity = Get intensity (dB)
895 .targetDuration = Get total duration
896 .downsampled = Resample: 10000, 50
897 .lpc = To LPC (autocorrelation): 10, 0.025, 0.005, 50
899 .source = Filter (inverse)
900 .sourceInt = To Intensity: 70, 0, "yes"
901 .sourceIntTier = To IntensityTier (peaks)
907 # Create the taget file
908 .masterSourceSound = Read from file: .sourceAudioName$
909 .masterDuration = Get total duration
910 while .masterDuration < 2*.duration
911 .tmpA = .masterSourceSound
914 .masterSourceSound = Concatenate
915 selectObject: .tmpA, .tmpB
917 select .masterSourceSound
918 .masterDuration = Get total duration
920 # Get a random start point
921 .startPoint = randomUniform (0, .duration)
922 select .masterSourceSound
923 .targetSound = Extract part: .startPoint, .startPoint+.duration, "rectangular", 1, "no"
925 select .masterSourceSound
930 .scaledBubbleSource = Multiply: "yes"
931 Scale intensity: .targetIntensity - .snr
933 selectObject: .targetSound, .sourceIntTier
936 select .scaledBubbleSource
938 .filteredBubbleSource = Filter: "no"
939 Rename: "TargetBubbleSound"
940 .targetSound = Resample: .samplingFrequency, 50
942 selectObject: .scaledBubbleSource, .lpc, .filteredBubbleSource
946 Scale intensity: .targetIntensity - .snr
948 .targetSound = Create Sound: "Bubbles", 0, .duration, .samplingFrequency, "0"
956 # Select a random puls in the bubbles and add it to a random puls in the target
958 # Creates a sound with only the bubbles
960 procedure add_bubbles .sound .rate .snr .vuvTextGrid .bubblesAudioName$
963 .targetIntensity = Get intensity (dB)
964 .targetDuration = Get total duration
965 .tagetSamplingFrequency = Get sampling frequency
966 .targetNumBubbles = .rate * .targetDuration
967 .downsampled = Resample: 10000, 50
968 .lpc = To LPC (autocorrelation): 10, 0.025, 0.005, 50
970 .source = Filter (inverse)
971 .sourceInt = To Intensity: 70, 0, "yes"
972 .sourceIntTier = To IntensityTier (peaks)
979 .additiveBubblesSound = Create Sound: "Bubbles", 0, .targetDuration, .tagetSamplingFrequency, "0"
983 # Create an empty sound to receive the bubbles
984 .bubblesAudio = Read from file: .bubblesAudioName$
985 .bubblesTextGridName$ = replace_regex$(.bubblesAudioName$, "\.[a-z0-9]{2,}$", ".TextGrid", 0)
986 .bubblesTextGrid = Read from file: .bubblesTextGridName$
988 .sourceName$ = replace_regex$(selected$(), " ", "_", 0)
989 .bubblesSamplingFrequency = Get sampling frequency
990 .bubblesIntensity = Get intensity (dB)
991 .bubbleSound = Create Sound: "Bubbles", 0, .targetDuration, .bubblesSamplingFrequency, "0"
993 # Fill the new Bubbles
994 select .bubblesTextGrid
995 .numIntervals = Get number of intervals: 1
997 while .bubblesFound < .targetNumBubbles
998 .i = randomInteger(1, .numIntervals)
999 select .bubblesTextGrid
1000 .label$ = Get label of interval: 1, .i
1001 if .label$ = "sounding"
1003 .startPoint = Get starting point: 1, .i
1004 .endPoint = Get end point: 1, .i
1005 .midPoint = (.startPoint + .endPoint)/2
1006 .bubbleDuration = .endPoint - .startPoint
1008 # Get random insertion point
1009 .t = randomUniform (0.001, .targetDuration-0.001)
1010 .targetStart = .t - .bubbleDuration/2
1011 .targetEnd = .t + .bubbleDuration/2
1013 Formula (part): .targetStart, .targetEnd, 1, 1, "self + '.sourceName$'((x - .t) + .midPoint)"
1017 # Convert selected bubbles to scaled source
1019 .resampledBubbleSound = Resample: .tagetSamplingFrequency, 50
1021 .scaledBubbleSource = Multiply: "yes"
1022 call set_VUV_to_zero .scaledBubbleSource .vuvTextGrid U
1024 # The measured Intensity of the few selected bubbles can be too low. Correct for scaling
1025 select .scaledBubbleSource
1026 .bubbleSoundIntensity = Get intensity (dB)
1027 .attenuation = .bubblesIntensity - .bubbleSoundIntensity
1028 if .attenuation = undefined
1032 # Scale bubble sounds
1033 select .scaledBubbleSource
1034 Scale intensity: .targetIntensity - .snr - .attenuation
1036 select .scaledBubbleSource
1038 .filteredBubbles = Filter: "no"
1039 Rename: "FilteredBubbleNoise"
1040 .additiveBubblesSound = Resample: .tagetSamplingFrequency, 50
1043 select .resampledBubbleSound
1044 plus .scaledBubbleSource
1045 plus .filteredBubbles
1049 plus .bubblesTextGrid
1055 select .additiveBubblesSound
1058 procedure add_single_bubble .sourceAudio .sourcePulses .sourceI .targetAudio .targetPulses .targetI
1061 .sourceName$ = replace_regex$(selected$(), " ", "_", 0)
1063 .targetName$ = replace_regex$(selected$(), " ", "_", 0)
1066 select .targetPulses
1067 .tTarget = Get time from index: .targetI
1068 .pLeft = Get interval: .tTarget - 0.001
1069 .pRight = Get interval: .tTarget + 0.001
1072 select .sourcePulses
1073 .tSource = Get time from index: .sourceI
1074 .qLeft = Get interval: .tSource - 0.001
1075 .qRight = Get interval: .tSource + 0.001
1077 # Gaussian window parameters (FWHM Left and Right)
1078 # FWHM = 2*sqrt(2*ln(2)) * c
1079 .c = (.qLeft+.qRight)/(2*sqrt(2*ln(2)))
1080 if not( .cL = undefined or .cR = undefined)
1083 Formula (part): .tTarget-.margin, .tTarget+.margin, 1, 1, "self + '.sourceName$'((x - .tTarget) + .tSource)*exp(-1*(((x - .tTarget)/.c)^2)/2)"
1087 ###########################################################################
1089 # Praat Script Syllable Nuclei #
1090 # Copyright (C) 2008 Nivja de Jong and Ton Wempe #
1092 # This program is free software: you can redistribute it and/or modify #
1093 # it under the terms of the GNU General Public License as published by #
1094 # the Free Software Foundation, either version 3 of the License, or #
1095 # (at your option) any later version. #
1097 # This program is distributed in the hope that it will be useful, #
1098 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
1099 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
1100 # GNU General Public License for more details. #
1102 # You should have received a copy of the GNU General Public License #
1103 # along with this program. If not, see http://www.gnu.org/licenses/ #
1105 ###########################################################################
1107 # modified 2010.09.17 by Hugo Quené, Ingrid Persoon, & Nivja de Jong
1108 # Overview of changes:
1109 # + change threshold-calculator: rather than using median, use the almost maximum
1110 # minus 25dB. (25 dB is in line with the standard setting to detect silence
1111 # in the "To TextGrid (silences)" function.
1112 # Almost maximum (.99 quantile) is used rather than maximum to avoid using
1113 # irrelevant non-speech sound-bursts.
1114 # + add silence-information to calculate articulation rate and ASD (average syllable
1116 # NB: speech rate = number of syllables / total time
1117 # articulation rate = number of syllables / phonation time
1118 # + remove max number of syllable nuclei
1119 # + refer to objects by unique identifier, not by name
1120 # + keep track of all created intermediate objects, select these explicitly,
1122 # + provide summary output in Info window
1123 # + do not save TextGrid-file but leave it in Object-window for inspection
1124 # (if requested in startup-form)
1125 # + allow Sound to have starting time different from zero
1126 # for Sound objects created with Extract (preserve times)
1127 # + programming of checking loop for mindip adjusted
1128 # in the orig version, precedingtime was not modified if the peak was rejected !!
1129 # var precedingtime and precedingint renamed to .currenttime and .currentint
1131 # + bug fixed concerning summing total pause, feb 28th 2011
1133 # modified 2014.10.24 by Rob van Son
1134 # Overview of changes:
1135 # + Converted to a function form. Can be called as -
1136 # call syllable_nuclei -25 2 0.3 1 .soundFile
1137 # where .soundFile is the ID of an open soundfile
1138 # + Added noprogress and cleaned up object id assignment
1140 ###########################################################################
1142 # counts syllables of sound utterances
1143 # NB unstressed syllables are sometimes overlooked
1144 # NB filter sounds that are quite noisy beforehand
1145 # NB use Silence threshold (dB) = -25 (or -20?)
1146 # NB use Minimum .dip between peaks (dB) = between 2-4 (you can first try;
1147 # For clean and filtered: 4)
1148 # syllable_nuclei.soundname$ - Name of sound object
1149 # syllable_nuclei.voicedcount - Count of vocied segments
1150 # syllable_nuclei.npause - Count of pauses
1151 # syllable_nuclei.originaldur - Original duration
1152 # syllable_nuclei.speakingtot - Duration of speech
1153 # syllable_nuclei.speakingrate - Syllable per second, gross
1154 # syllable_nuclei.articulationrate - Syllables per speaking time
1155 # syllable_nuclei.asd - Average syllable duration
1158 # real .silence_threshold -25 (dB)
1159 # real .minimum_dip_between_peaks 2 (dB)
1160 # real .minimum_pause_duration 0.3 (s)
1161 # boolean .keep_Soundfiles_and_Textgrids 1
1165 # call syllable_nuclei -25 2 0.3 1 .originalRecording
1167 procedure syllable_nuclei .silence_threshold .minimum_dip_between_peaks .minimum_pause_duration .keep_Soundfiles_and_Textgrids .soundid
1171 .soundname$ = selected$("Sound")
1174 .silencedb = .silence_threshold
1175 .mindip = .minimum_dip_between_peaks
1176 .showtext = .keep_Soundfiles_and_Textgrids
1177 .minpause = .minimum_pause_duration
1179 .originaldur = Get total duration
1180 # allow non-zero starting time
1181 .bt = Get starting time
1183 # Use intensity to get .threshold
1184 .intid = noprogress To Intensity... 50 0 yes
1185 .start = Get time from frame number... 1
1186 .nframes = Get number of frames
1187 .end = Get time from frame number... '.nframes'
1189 # estimate noise floor
1191 .minint = Get minimum... 0 0 Parabolic
1192 # estimate noise max
1193 .maxint = Get maximum... 0 0 Parabolic
1194 #get .99 quantile to get maximum (without influence of non-speech sound bursts)
1195 .max99int = Get quantile... 0 0 0.99
1197 # estimate Intensity .threshold
1198 .threshold = .max99int + .silencedb
1199 .threshold2 = .maxint - .max99int
1200 .threshold3 = .silencedb - .threshold2
1201 if .threshold < .minint
1202 .threshold = .minint
1205 # get pauses (silences) and speakingtime
1207 .textgridid = noprogress To TextGrid (silences)... 80 0 '.threshold3' '.minpause' 0.1 silent sounding
1208 .silencetierid = Extract tier... 1
1209 .silencetableid = Down to TableOfReal... sounding
1210 nsounding = Get number of rows
1211 .npauses = 'nsounding'
1213 for ipause from 1 to .npauses
1214 beginsound = Get value... 'ipause' 1
1215 endsound = Get value... 'ipause' 2
1216 speakingdur = 'endsound' - 'beginsound'
1217 .speakingtot = 'speakingdur' + '.speakingtot'
1222 .matid = selected("Matrix")
1223 # Convert intensity to sound
1224 .sndintid = noprogress To Sound (slice)... 1
1226 # use total duration, not .end time, to find out duration of .intdur
1227 # in order to allow nonzero starting times.
1228 .intdur = Get total duration
1229 intmax = Get maximum... 0 0 Parabolic
1231 # estimate peak positions (all peaks)
1232 .ppid = noprogress To PointProcess (extrema)... Left yes no Sinc70
1234 numpeaks = Get number of points
1236 # fill array with time points
1237 for .i from 1 to numpeaks
1238 t'.i' = Get time from index... '.i'
1242 # fill array with intensity values
1245 for .i from 1 to numpeaks
1246 value = Get value at time... t'.i' Cubic
1247 if value > .threshold
1249 int'.peakcount' = value
1250 .timepeaks'.peakcount' = t'.i'
1255 # fill array with valid peaks: only intensity values if preceding
1256 # .dip in intensity is greater than .mindip
1259 .currenttime = .timepeaks1
1262 for .p to .peakcount-1
1264 .followingtime = .timepeaks'.following'
1265 .dip = Get minimum... '.currenttime' '.followingtime' None
1266 .diffint = abs(.currentint - .dip)
1268 if .diffint > .mindip
1269 .validpeakcount += 1
1270 validtime'.validpeakcount' = .timepeaks'.p'
1272 .currenttime = .timepeaks'.following'
1273 .currentint = Get value at time... .timepeaks'.following' Cubic
1277 # Look for only voiced parts
1279 .pitchid = noprogress To Pitch (ac)... 0.02 30 4 no 0.03 0.25 0.01 0.35 0.25 450
1282 for .i from 1 to .validpeakcount
1283 .querytime = validtime'.i'
1285 select '.textgridid'
1286 .whichinterval = Get interval at time... 1 '.querytime'
1287 .whichlabel$ = Get label of interval... 1 '.whichinterval'
1290 value = Get value at time... '.querytime' Hertz Linear
1292 if value <> undefined
1293 if .whichlabel$ = "sounding"
1294 .voicedcount = .voicedcount + 1
1295 voicedpeak'.voicedcount' = validtime'.i'
1300 # calculate time correction due to shift in time for Sound object versus
1302 .timecorrection = .originaldur/.intdur
1304 # Insert voiced peaks in TextGrid
1306 select '.textgridid'
1307 Insert point tier... 1 syllables
1309 for .i from 1 to .voicedcount
1310 position = voicedpeak'.i' * .timecorrection
1311 Insert point... 1 position '.i'
1315 # clean up before next sound file is opened
1322 plus .silencetableid
1331 # summarize results in Info window
1332 .speakingrate = '.voicedcount'/'.originaldur'
1333 .articulationrate = '.voicedcount'/'.speakingtot'
1334 .npause = '.npauses'-1
1335 .asd = '.speakingtot'/'.voicedcount'