5 sentence Recorded_audio ookhetweer.wav
6 real Pitch 70 (= Hz, mean)
7 real Pitch_SD 15 (= % of mean)
8 real Duration 1.3 (= mult. factor)
12 positive Voicing_floor_(dB) 15 (= below maximum)
16 # Increase the Jitter and Shimmer of a speech recording to the
17 # number given. Cannot reduce Jitter or Shimmer.
18 # Note that Jitter and Shimmer are ill-defined in anything but
21 # Uses the To PointProcess (periodic, cc) to calculate the jitter
22 # and To PointProcess (periodic, peaks): 60, 300, "yes", "yes"
23 # to change the timing of the periods.
25 # Periods are moved with Overlap-and-Add
27 # Shimmer is adapted using additive noise over an intensity tier and
28 # adapting each period individually. Periods are determined with the
29 # To PointProcess (periodic, peaks) pulses.
31 ########################################################################
33 # Copyright (C) 2016-2017 NKI-AVL, R. J. J. H. van Son
36 # This program is free software: you can redistribute it and/or modify
37 # it under the terms of the GNU General Public License as published by
38 # the Free Software Foundation, either version 3 of the License, or
39 # (at your option) any later version.
41 # This program is distributed in the hope that it will be useful,
42 # but WITHOUT ANY WARRANTY; without even the implied warranty of
43 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 # GNU General Public License for more details.
46 # You should have received a copy of the GNU General Public License
47 # along with this program. If not, see <http://www.gnu.org/licenses/>.
49 # Full license text is available at:
50 # http://www.gnu.org/licenses/gpl-3.0.html
52 ########################################################################
54 # Recorded_audio: Filename of recorind. Uses selected sound object if present.
55 # Jitter: New Jitter in %
56 # Shimmer: New Shimmer in %
57 # Voicing_floor: Stop voicing below this intensity relative to peak (dB)
59 # Print debugging information
62 if numberOfSelected("Sound") > 0
63 .recordedSound = selected("Sound")
64 elsif recorded_audio$ <> "" and fileReadable(recorded_audio$)
65 .recordedSound = Read from file: recorded_audio$
66 Rename: "RecordedSpeech"
69 .thresshold = -voicing_floor
73 global.setIntensity = Get intensity (dB)
75 call convert_speechOverlapAndAdd .recordedSound .thresshold jitter shimmer pitch pitch_SD duration hNR
77 # Definitions of functions
80 procedure convert_speechOverlapAndAdd .recordedSound .thresshold .jitter .shimmer .pitch .pitch_SD .durationFactor .newHNR
81 call change_pitch_duration .recordedSound .pitch .pitch_SD .durationFactor
82 .newPitchSound = selected("Sound")
84 call extractVoicingParameters .newPitchSound .thresshold
85 .recordedTextGrid = selected("TextGrid")
86 .recordedPulses = selected("PointProcess")
87 .recordedInt = selected("Intensity")
90 .recordedPulsesPeaks = To PointProcess (periodic, peaks): 60, 300, "yes", "yes"
92 call create_additive_noise .newPitchSound .newHNR
93 .additiveNoise = selected("Sound")
95 # Change Jitter, use CC to determine jitter and Peaks to change the periods
96 selectObject: .recordedPulsesPeaks
97 .newPointProcess = Copy: "New_Pulses"
98 call set_jitter .jitter .newPointProcess .recordedPulses
99 call test_overlap_add .newPitchSound .recordedPulsesPeaks .recordedTextGrid .newPointProcess .shimmer
100 .newSound = selected("Sound")
105 selectObject: .recordedPulses
106 .old_jitter = Get jitter (local): 0, 0, 0.0001, 0.03, 2
107 selectObject: .newPointProcess
108 .newPPjitter = Get jitter (local): 0, 0, 0.0001, 0.03, 2
109 selectObject: .recordedSound
111 .old_amplitude = To AmplitudeTier (period): 0, 0, 0.0001, 0.03, 2
112 .old_shimmer = Get shimmer (local): 0.0001, 0.03, 2
115 selectObject: .newSound
116 .pointP = To PointProcess (periodic, cc): 60, 300
117 .new_jitter = Get jitter (local): 0, 0, 0.0001, 0.03, 2
119 selectObject: .newSound
121 .new_amplitude = To AmplitudeTier (period): 0, 0, 0.0001, 0.03, 2
122 .new_shimmer = Get shimmer (local): 0.0001, 0.03, 2
124 appendInfoLine: "New Jitter: '.new_jitter:1%' ('.old_jitter:1%' ~> '.newPPjitter:1%')"
125 appendInfoLine: "New Shimmer: '.new_shimmer:1%' ('.old_shimmer:1%')"
127 selectObject: .old_amplitude, .pointP, .new_amplitude
131 # Add noise to result
132 call add_sounds .newSound .additiveNoise
133 .result = selected("Sound")
137 selectObject: .newPitchSound, .recordedTextGrid, .recordedPulses, .recordedInt, .newPointProcess, .recordedPulsesPeaks, .newSound, .additiveNoise
140 selectObject: .result
143 procedure change_pitch_duration .sound .pitch .pitchFraction .durationFactor
145 .duration = Get total duration
147 .manipulation = To Manipulation: 0.01, 70, 300
148 .pitchTier = Extract pitch tier
149 .currentPitch = Get mean (points): 0, 0
150 .pitch_SD = .pitchFraction / 100 * .pitch
153 .durationTier = Extract duration tier
156 if .durationFactor > 0
157 .numPoints = Get number of points
161 Formula: "self*'.durationFactor'"
164 Replace duration tier
169 .factor = (.pitch / .currentPitch)
170 Multiply frequencies: 0, .duration, .factor
171 .currentSD = Get standard deviation (points): 0, 0
174 .factor = .pitch_SD / .currentSD
175 Formula: "'.pitch' + (self - '.pitch') * '.factor'"
184 if .currentPitch > 0 or .durationFactor > 0
186 .newSound = Get resynthesis (overlap-add)
189 .newSound = Copy: "New Sound"
199 procedure extractVoicingParameters .recordedSound .thresshold
200 # The lowest level of voiced sounds
201 select .recordedSound
202 .pointPcc = To PointProcess (periodic, cc): 60, 300
203 Rename: "RecordedPulses"
204 .textGrid = To TextGrid (vuv): 0.02, 0.01
205 Rename: "RecordedVoicing"
206 .numIntervals = Get number of intervals: 1
208 # Correct voicing boundaries
209 select .recordedSound
210 .intensity = To Intensity: 100, 0, "yes"
211 Rename: "RecordedIntensity"
212 .silences = To TextGrid (silences): .thresshold, 0.1, 0.05, "silent", "sounding"
215 for .i to .numIntervals
217 .label$ = Get label of interval: 1, .i
219 .start = Get starting point: 1, .i
220 .end = Get end point: 1, .i
222 # Starting point of voiced interval
224 .s = Get interval at time: 1, .start
225 .sLabel$ = Get label of interval: 1, .s
226 if .sLabel$ = "silent"
227 .sStart = Get starting point: 1, .s
228 .sEnd = Get end point: 1, .s
231 Set interval text: 1, .i, "U"
232 # Shift boundaries: Insert&Remove
233 Insert boundary: 1, .sEnd
234 Set interval text: 1, .i+1, "V"
236 Set interval text: 1, .i, ""
237 Remove left boundary: 1, .i
240 # Low intensity, unvoiced
241 Set interval text: 1, .i, "U"
248 for .i to .numIntervals
250 .label$ = Get label of interval: 1, .i
252 .start = Get starting point: 1, .i
253 .end = Get end point: 1, .i
255 # Starting point of voiced interval
257 .s = Get interval at time: 1, .end
258 .sLabel$ = Get label of interval: 1, .s
259 if .sLabel$ = "silent"
260 .sStart = Get starting point: 1, .s
261 .sEnd = Get end point: 1, .s
264 Set interval text: 1, .i, "U"
265 # Shift boundaries: Insert&Remove
266 Insert boundary: 1, .sStart
267 Set interval text: 1, .i, "V"
269 Set interval text: 1, .i+1, ""
270 Remove right boundary: 1, .i+1
273 # Low intensity, unvoiced
274 Set interval text: 1, .i, "U"
288 procedure create_additive_noise .sound .newHNR
290 .duration = Get total duration
291 .sampleFreq = Get sampling frequency
293 .additiveNoiseSound = -1
295 # Determine noise level
297 .originalIntensity = Get intensity (dB)
298 .additiveNoise = .originalIntensity - .newHNR
302 .downsampled = Resample: 10000, 50
303 .lpc = To LPC (autocorrelation): 10, 0.025, 0.005, 50
305 .source = Filter (inverse)
306 .sourceInt = To Intensity: 70, 0, "yes"
307 .sourceIntTier = To IntensityTier (peaks)
309 # Create additive noise
310 if .additiveNoise > 0
311 .noise = Create Sound from formula: "WhiteNoise", 1, 0, .duration, .sampleFreq, "randomGauss(0,0.1)"
313 .filteredNoise = Filter: "no"
315 .additiveNoiseSoundTMP = Multiply: "yes"
316 Scale intensity: .additiveNoise
317 .additiveNoiseSound = Resample: .sampleFreq, 50
319 selectObject: .noise, .filteredNoise, .additiveNoiseSoundTMP
323 selectObject: .downsampled, .lpc, .source, .sourceInt, .sourceIntTier
328 if .additiveNoiseSound <= 0
329 .additiveNoiseSound = Create Sound from formula: "AdditiveNoise", 1, 0, .duration, .sampleFreq, "0"
332 select .additiveNoiseSound
335 procedure add_sounds .sound1 .sound2
336 selectObject: .sound1, .sound2
337 .stereo = Combine to stereo
338 .addedSound = Convert to mono
343 selectObject: .addedSound
347 # Set Jitter to a specified number
349 # Ti = ti - ti-1 (interval i)
350 # Jitter (absolute) is Sum[ abs(Ti - Ti-1) ] / N-1
351 # Jitter = Jitter (absolute) / mean(Ti)
353 # For a Normal distribution
354 # E(|X|) = sqrt(2/pi) * stdev(X)
357 # E(Ti^2) = var(Ti) + E(Ti)^2
358 # E(Ti*Ti-1) = cor(Ti, Ti-1) + E(Ti)^2
359 # var(Ti - Ti-1) = E(Ti^2 - 2*Ti*Ti-1 + Ti-1^2)
360 # = 2*E(Ti^2) - 2*E(Ti*Ti-1)
361 # = 2*[ var(Ti) * (1 - cor(Ti, Ti-1)) ]
363 # Combine, assuming a Normal distribution:
364 # Jitter = E(|Ti - Ti-1|) / E(Ti)
365 # = sqrt(2/pi) * stdev(Ti - Ti-1) / mean(Ti)
366 # = sqrt(2/pi * var(Ti - Ti-1)) / mean(Ti)
367 # = sqrt[ 4/pi * ( var(Ti) * (1 - cor(Ti, Ti-1)) ) ] / mean(Ti)
369 # Change Ti -> T'i; Jitter -> a*Jitter while keeping mean(Ti) = mean(T'i) constant
370 # ei = (ti + ti-2)/2 - ti-1
371 # Jitter' = a * Jitter
372 # = a * sqrt[ 4/pi * var(Ti - Ti-1) ] / mean(Ti)
374 # => var(T'i - T'i-1) = a^2 * var(Ti - Ti-1)
375 # = a^2 * E[ (Ti - Ti-1)^2 ]
376 # = a^2 * E[ (ti - ti-1 - ti-1 + ti-2)^2 ]
377 # = a^2 * 2 * E[ ((ti + ti-2)/2 - ti-1)^2 ]
378 # = a^2 * 2 * E[ ei^2 ]
379 # = 2 * E[ (a*ei)^2 ]
382 # Generalizing, var(T'i - T'i-1) = 2*(var(ti-1) + var(ti) + var(ti+1))
383 # To increase Jitter -> Jitter'
384 # 1) Determine var(Ti - Ti-1) = (Jitter * mean(Ti))^2 * pi / 2
385 # 2) Calculate var(T'i - T'i-1) = (Jitter' * mean(T'i))^2 * pi / 2
386 # 3) Determine var to add:
387 # add_var(Ti - Ti-1) = var(T'i - T'i-1) - var(Ti - Ti-1)
388 # 4) Var of Noise to add per ti: add_var(ti) = add_var(Ti - Ti-1)/(2*3)
389 # 5) Sd of Noise to add per ti: add_sd(ti) = sqrt(add_var(ti))
392 # Converts .pulses into pulses with new Jitter
393 procedure set_jitter .newJitter .pulses .pulsesCC
395 if .pulses > 0 and .newJitter > 0
398 # Use CC to determine real jitter
402 .current_jitter = Get jitter (local): 0, 0, 0.0001, 0.03, 2
403 .current_abs_jitter = Get jitter (local, absolute): 0, 0, 0.0001, 0.03, 2
404 .current_mean_period = Get mean period: 0, 0, 0.0001, 0.03, 2
405 .current_stdev_period = Get stdev period: 0, 0, 0.0001, 0.03, 2
407 if .newJitter > .current_jitter
408 .current_var = .current_abs_jitter**2 * pi/2
409 .end_var = (.newJitter * .current_mean_period)**2 * pi/2
410 # The variance to add per boundary (total / (2*3))
411 .add_var = (.end_var - .current_var) / 6
412 .stdev_e = sqrt(.add_var)
414 # Keep the original pulses just is case the order of the pulses might change
416 .origPulses = Copy: "Original_Pulses"
417 .numPoints = Get number of points
420 # Change jitter by moving the ti according to
421 # t'i = ti - randomGauss(0, stdev(e'))
422 for .p from 1 to .numPoints
424 .t = Get time from index: .p
425 .new_t = .t - randomGauss(0, .stdev_e)
427 # Remove current point
429 .r = Get nearest index: .t
437 pause New jitter: '.newJitter' must be larger than current jitter '.current_jitter:4'
440 # Calculate new jitter
442 .jitter_new = Get jitter (local): 0, 0, 0.0001, 0.03, 2
444 .current_jitter *= 100
451 # We cannot use the shimmer of a sentence, so we can only "add" shimmer
453 # .new_shimmer is in %
454 # .sound: Source Sound
455 # .pulses: PointProcess
456 # .voicing: VUV TextGrid
457 # .new_shimmer: New shimmer in %
458 procedure increase_shimmer .sound .pulses .voicing .newShimmer
463 # Create Amplitude Tier and get current shimmer
465 .duration = Get total duration
467 .current_amplitude = To AmplitudeTier (period): 0, 0, 0.0001, 0.03, 2
468 .current_shimmer = Get shimmer (local): 0.0001, 0.03, 2
469 select .current_amplitude
470 .numPoints = Get number of points
471 .ampreal = Down to TableOfReal
474 for .p from 1 to .numPoints
476 .tmp = Get value: .p, 2
482 .meanAmp = .sumamp / .n
484 # Sd must be multiplied with the amplitude
485 if .newShimmer > .current_shimmer
486 .new_var = (.newShimmer**2 - .current_shimmer**2) * .meanAmp**2 * pi / 2
488 .new_var = .newShimmer**2 * .meanAmp**2 * pi / 2
491 .new_sd = sqrt(.new_var / 2)
496 .new_amplitude = Create AmplitudeTier: "New_Amplitude", 0, .duration
497 for .p from 1 to .numPoints
499 .t = Get value: .p, 1
500 .a = Get value: .p, 2
505 .new_a = .a - randomGauss(0, .new_sd)
511 select .new_amplitude
512 Add point: .t, .new_a / .a
518 # Set unvoiced parts to 1
519 select .new_amplitude
523 .numIntervals = Get number of intervals: 1
524 for .i from 1 to .numIntervals
526 .t = Get end point: 1, .i
527 select .new_amplitude
532 plus .current_amplitude
535 # Overlay shimmer over sound
538 .new_sound = Multiply
539 Rename: "NewSound_Shimmer"
543 .shimmer_new = Get shimmer (local): 0, 0, 0.0001, 0.02, 1.3, 1.6
545 .current_shimmer *= 100
548 select .new_amplitude
553 .new_sound = Copy: "NewSound_Shimmer"
559 # Make a copy of the source to the target matching the pulses in source and target
560 # Copies fragments around pulses in sourcePulses under the direction of the
561 # corresponding pulses in targetPulses using the Overlap&Add method (Gaussian window)
563 # Ignores voiceless parts, ie, intervals between pulses > .maxInt
564 # For voices, .maxInt should be ~0.02 (F0 > 50Hz). For other sounds, e.g., bubbles, this
565 # should be increased to fit the whole sound between pulses.
567 # Midpoint between the pulses, periods add up to a factor of ~1.04.
568 # At the pulses themselves, it adds up to ~1.12 (summed left and right)
570 procedure overlap_add .sourceSound .sourcePulses .targetSound .targetPulses .maxInt
571 # Maximum interval between pulses (maximum pitch period)
577 .sourceName$ = replace_regex$(selected$(), " ", "_", 0)
579 .targetName$ = replace_regex$(selected$(), " ", "_", 0)
581 # Iterate over target pulses
583 .numPulses = Get number of points
587 .tTarget = Get time from index: .p
588 .pLeft = Get interval: .tTarget - 0.001
589 .pRight = Get interval: .tTarget + 0.001
592 .q = Get nearest index: .tTarget
593 .tSource = Get time from index: .q
594 .qLeft = Get interval: .tSource - 0.001
595 .qRight = Get interval: .tSource + 0.001
596 # Gaussian window parameters (FWHM Left and Right)
597 # FWHM = 2*sqrt(2*ln(2)) * c
598 .cL = min(.pLeft,.qLeft)/(2*sqrt(2*ln(2)))
599 .cR = min(.pRight,.qRight)/(2*sqrt(2*ln(2)))
600 if not( .cL = undefined or .cL > .maxInt/(2*sqrt(2*ln(2))) or .cR = undefined or .cR > .maxInt/(2*sqrt(2*ln(2))) )
603 Formula (part): .tTarget-.margin, .tTarget+.margin, 1, 1, "if x<.tTarget then self + '.sourceName$'((x - .tTarget) + .tSource)*exp(-1*(((x - .tTarget)/.cL)^2)/2) else self + '.sourceName$'((x - .tTarget) + .tSource)*exp(-1*(((x - .tTarget)/.cR)^2)/2) endif"
610 procedure test_overlap_add .sourceAudio .sourcePulses .vuvTextGrid .targetPulses .newShimmer
611 # Use overlap-add to add new source intervals
612 # Copy only voiced pulses
613 call set_VUV_to_zero .targetPulses .vuvTextGrid U
614 # Create a copy of the old source with voiced parts zeroed
616 .testSource = Copy: "OaAsound"
617 call set_VUV_to_zero .testSource .vuvTextGrid V
619 # Copy the voiced parts of the new source to the zeroed voiced parts of the old source
620 call overlap_add .sourceAudio .sourcePulses .testSource .targetPulses 0.02
621 call increase_shimmer .testSource .targetPulses .vuvTextGrid .newShimmer
622 .newSound = selected("Sound")
623 Scale intensity: global.setIntensity
632 # Set intervals matching a label text to Zero or remove the pulses
633 # Works on Sound and Pulses
634 procedure set_VUV_to_zero .sound .vuvTextGrid .zeroLabel$
636 .objectType$ = selected$()
637 .objectType$ = extractWord$ (.objectType$, "")
639 .numIntervals = Get number of intervals: 1
640 # Zero out VU intervals
641 for .i to .numIntervals
643 .vuvLabel$ = Get label of interval: 1, .i
644 .start = Get starting point: 1, .i
645 .end = Get end point: 1, .i
646 if .vuvLabel$ = .zeroLabel$
648 if .objectType$ = "Sound"
649 Set part to zero: .start, .end, "at nearest zero crossing"
650 elsif .objectType$ = "PointProcess"
651 Remove points between: .start, .end
653 printline Unsupported object type for set_VUV_to_zero