4 <title>meSpeak.js: Text-to-Speech on the Web
</title>
5 <meta name=
"description" content=
"meSpeak.js (modulary enhanced speak.js) is a 100% client-side JavaScript text-to-speech library based on the speak.js project." />
6 <link href=
"http://fonts.googleapis.com/css?family=Open+Sans&subset=latin" rel=
"stylesheet" type=
"text/css" />
7 <link href=
"http://fonts.googleapis.com/css?family=Lato:300&subset=latin" rel=
"stylesheet" type=
"text/css" />
10 // This demo is licensed under the GNU GPL.
12 <script type=
"text/javascript" src=
"mespeak.js"></script>
13 <script type=
"text/javascript">
14 meSpeak
.loadConfig("mespeak_config.json");
15 meSpeak
.loadVoice("voices/en/en.json");
17 function loadVoice(id
) {
18 var fname
="voices/"+id
+".json";
19 meSpeak
.loadVoice(fname
, voiceLoaded
);
22 function voiceLoaded(success
, message
) {
24 alert("Voice loaded: "+message
+".");
27 alert("Failed to load a voice: "+message
);
33 additional functions for generating a link and parsing any url-params provided for auto-speak
36 var formFields
= ['text','amplitude','wordgap','pitch','speed'];
38 function autoSpeak() {
39 // checks url for speech params, sets and plays them, if found.
40 // also adds eventListeners to update a link with those params using current values
41 var i
,l
,n
,params
,pairs
,pair
,
44 q
=document
.location
.search
,
45 f
=document
.getElementById('speakData'),
46 s1
=document
.getElementById('variantSelect'),
47 s2
=document
.getElementById('voiceSelect');
48 if (!f
|| !s2
) return; // form and/or select not found
52 pairs
=q
.substring(1).split('&');
53 for (i
=0, l
=pairs
.length
; i
<l
; i
++) {
54 pair
=pairs
[i
].split('=');
55 if (pair
.length
==2) params
[pair
[0]]=decodeURIComponent(pair
[1]);
57 // insert params into the form or complete them from defaults in form
58 for (i
=0, l
=formFields
.length
; i
<l
; i
++) {
61 f
.elements
[n
].value
=params
[n
];
64 params
[n
]=f
.elements
[n
].value
;
68 for (i
=0, l
=s1
.options
.length
; i
<l
; i
++) {
69 if (s1
.options
[i
].value
==params
.variant
) {
78 // compile a function to speak with given params for later use
79 // play only, if param "auto" is set to "true" or "1"
80 if (params
.auto
=='true' || params
.auto
=='1') {
81 speakNow = function() {
82 meSpeak
.speak(params
.text
, {
83 amplitude
: params
.amplitude
,
84 wordgap
: params
.wordgap
,
87 variant
: params
.variant
91 // check for any voice specified by the params (other than the default)
92 if (params
.voice
&& params
.voice
!=s2
.options
[s2
.selectedIndex
].value
) {
93 // search selected voice in selector
94 for (i
=0, l
=s2
.options
.length
; i
<l
; i
++) {
95 if (s2
.options
[i
].value
==params
.voice
) {
96 // voice found: adjust the form, load voice-data and provide a callback to speak
98 meSpeak
.loadVoice('voices/'+params
.voice
+'.json', function(success
, message
) {
100 if (speakNow
) setTimeout(speakNow
, 10);
103 if (window
.console
) console
.log('Failed to load requested voice: '+message
);
106 useDefaultVoice
=false;
111 // standard voice: speak (deferred until config is loaded)
112 if (speakNow
&& useDefaultVoice
) speakNow();
114 // initial url-processing done, add eventListeners for updating the link
115 for (i
=0, l
=formFields
.length
; i
<l
; i
++) {
116 f
.elements
[formFields
[i
]].addEventListener('change', updateSpeakLink
, false);
118 s1
.addEventListener('change', updateSpeakLink
, false);
119 s2
.addEventListener('change', updateSpeakLink
, false);
120 // finally, inject a link with current values into the page
124 function updateSpeakLink() {
125 // injects a link for auto-execution using current values into the page
126 var i
,l
,n
,f
,s
,v
,url
,el
,params
=new Array();
127 // collect values from form
128 f
=document
.getElementById('speakData');
129 for (i
=0, l
=formFields
.length
; i
<l
; i
++) {
131 params
.push(n
+'='+encodeURIComponent(f
.elements
[n
].value
));
134 s
=document
.getElementById('variantSelect');
135 if (s
.selectedIndex
>=0) params
.push('variant='+s
.options
[s
.selectedIndex
].value
);
136 // get current voice, default to 'en/en' as a last resort
137 s
=document
.getElementById('voiceSelect');
138 if (s
.selectedIndex
>=0) v
=s
.options
[s
.selectedIndex
].value
;
139 if (!v
) v
=meSpeak
.getDefaultVoice() || 'en/en';
140 params
.push('voice='+encodeURIComponent(v
));
141 params
.push('auto=true');
142 // assemble the url and add it as GET-link to the page
143 url
='?'+params
.join('&');
144 url
=url
.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/\"/g
, '"');
145 el
=document
.getElementById('linkdisplay');
146 if (el
) el
.innerHTML
='Instant Link: <a href="'+url
+'">Speak this</a>.';
149 // trigger auto-speak at DOMContentLoaded
150 if (document
.addEventListener
) document
.addEventListener( "DOMContentLoaded", autoSpeak
, false );
153 end of auto-speak glue
158 <style type=
"text/css">
162 padding: 2em 1.5em 4.5em 1.5em;
163 background-color: #e2e3e4;
168 padding: 2px 40px 60px 40px;
169 margin: 0 auto
0 auto
;
170 background-color: #fafafb;
172 font-family: 'Open Sans',sans-serif
;
178 font-family: 'Lato',sans-serif
;
186 margin-bottom: 0.5em;
194 margin-bottom: 1.5em;
203 h1 span
.pict
{ font-size: 38px; color: #ccc; margin-left: 0.5em; letter-spacing: -2px; }
204 h3
.about
{ font-size: 28px; }
208 margin-bottom: 0.5em;
213 padding: 1em 0 1em 2em;
215 font-family: monospace
;
217 background-color: #f2f3f5;
220 p
.codesample strong
{ color: #222; }
231 a
.noteLink
{ text-decoration: none
; }
232 ul
.bottomMargin li
{ margin-bottom: 0.2em; }
236 font-variant: normal
;
245 margin-bottom: 0.4em;
247 table
.opttable
{ margin-left: 2em; }
248 table
.opttable td
{ white-space: nowrap
; }
249 table
.opttable
td:first-child
{ padding-right: 1.5em; }
254 font-family: monospace
;
257 background-color: #eee;
259 p
.history_codesample span
.comment
{ color: #555; }
260 a
{ color: #006f9e; }
261 a:hover
,a:focus
{ color: #2681a7; }
262 a:active
{ color: #cd360e; }
266 <h1>meSpeak.js
<span class=
"pict">((
• ))
</span></h1>
267 <h2>Text-To-Speech on the Web
</h2>
268 <form id=
"speakData" onsubmit=
"meSpeak.speak(text.value, { amplitude: amplitude.value, wordgap: wordgap.value, pitch: pitch.value, speed: speed.value, variant: variant.options[variant.selectedIndex].value }); return false">
269 <p><strong>Text:
</strong> <input type=
"text" name=
"text" size=
80 value=
"Never gonna give, you, up." />
270 <input type=
"submit" value=
"Go!" />
271 <input type=
"button" value=
"Stop" onclick=
"meSpeak.stop(); return true;" /></p>
272 <p><strong>Options:
</strong>
273 Amplitude:
<input type=
"text" name=
"amplitude" size=
5 value=
"100" />
274 Pitch:
<input type=
"text" name=
"pitch" size=
5 value=
"50" />
275 Speed:
<input type=
"text" name=
"speed" size=
5 value=
"175" />
276 Word gap:
<input type=
"text" name=
"wordgap" size=
5 value=
"0" />
277 Variant:
<select name=
"variant" id=
"variantSelect">
278 <option value=
"" selected
>None
</option>
279 <option value=
"f1">f1 (female
1)
</option>
280 <option value=
"f2">f2 (female
2)
</option>
281 <option value=
"f3">f3 (female
3)
</option>
282 <option value=
"f4">f4 (female
4)
</option>
283 <option value=
"f5">f5 (female
5)
</option>
284 <option value=
"m1">m1 (male
1)
</option>
285 <option value=
"m2">m2 (male
2)
</option>
286 <option value=
"m3">m3 (male
3)
</option>
287 <option value=
"m4">m4 (male
4)
</option>
288 <option value=
"m5">m5 (male
5)
</option>
289 <option value=
"m6">m6 (male
6)
</option>
290 <option value=
"m7">m7 (male
7)
</option>
291 <option value=
"croak">croak
</option>
292 <option value=
"klatt">klatt
</option>
293 <option value=
"klatt2">klatt2
</option>
294 <option value=
"klatt3">klatt3
</option>
295 <option value=
"whisper">whisper
</option>
296 <option value=
"whisperf">whisperf (female)
</option>
299 <form onsubmit=
"return false">
300 <p><strong>Voice:
</strong> <select id=
"voiceSelect" onchange=
"loadVoice(this.options[this.selectedIndex].value);">
301 <option value=
"ca">ca - Catalan
</option>
302 <option value=
"cs">cs - Czech
</option>
303 <option value=
"de">de - German
</option>
304 <option value=
"el">el - Greek
</option>
305 <option value=
"en/en" selected=
"selected">en - English
</option>
306 <option value=
"en/en-n">en-n - English, regional
</option>
307 <option value=
"en/en-rp">en-rp - English, regional
</option>
308 <option value=
"en/en-sc">en-sc - English, Scottish
</option>
309 <option value=
"en/en-us">en-us - English, US
</option>
310 <option value=
"en/en-wm">en-wm - English, regional
</option>
311 <option value=
"eo">eo - Esperanto
</option>
312 <option value=
"es">es - Spanish
</option>
313 <option value=
"es-la">es-la - Spanish, Latin America
</option>
314 <option value=
"fi">fi - Finnish
</option>
315 <option value=
"fr">fr - French
</option>
316 <option value=
"hu">hu - Hungarian
</option>
317 <option value=
"it">it - Italian
</option>
318 <option value=
"kn">kn - Kannada
</option>
319 <option value=
"la">la - Latin
</option>
320 <option value=
"lv">lv - Latvian
</option>
321 <option value=
"nl">nl - Dutch
</option>
322 <option value=
"pl">pl - Polish
</option>
323 <option value=
"pt">pt - Portuguese, Brazil
</option>
324 <option value=
"pt-pt">pt-pt - Portuguese, European
</option>
325 <option value=
"ro">ro - Romanian
</option>
326 <option value=
"sk">sk - Slovak
</option>
327 <option value=
"sv">sv - Swedish
</option>
328 <option value=
"tr">tr - Turkish
</option>
329 <option value=
"zh">zh - Mandarin Chinese (Pinyin)
</option>
330 <option value=
"zh-yue">zh-yue - Cantonese Chinese
</option>
333 <p id=
"linkdisplay"></p>
334 <hr class=
"separator" />
335 <p><em>First things first: Where can I download this?
— See the
<a href=
"#download">download-link
</a> below.
</em></p>
336 <h3 class=
"about">About
</h3>
338 <strong>meSpeak.js
</strong> (modulary enhanced speak.js) is a
100% client-side JavaScript text-to-speech library based on the
<a href=
"http://syntensity.com/static/espeak.html" target=
"_blank">speak.js
</a> project (see below).
<br />
339 meSpeak.js adds support for Webkit and Safari and introduces loadable voice modules. Also there is no more need for an embedding HTML-element.
<br />
340 Separating the code of the library from config-data and voice definitions should help future optimizations of the core part of
<a href=
"http://syntensity.com/static/espeak.html" target=
"_blank">speak.js
</a>.
<br />
341 All separated data has been compressed to base64-encoded strings from the original binary files to save some bandwidth (compared to JS-arrays of raw
8-bit data).
<br />
342 Browser requirements: Firefox, Chrome/Opera, Webkit, and Safari (MSIE11 is expected to be compliant).
<br /><br />
343 meSpeak.js
2011-
2015 by Norbert Landsteiner, mass:werk
– media environments;
<a href=
"http://www.masswerk.at/mespeak/">http://www.masswerk.at/mespeak/
</a><br /><br />
344 <strong>Important Changes:
</strong><br /><br />
345 <strong>v
.1.1</strong> adds support for the
<em>Web Audio API
</em> (AudioContext), which is now the preferred option for playback with the HTMLAudioElement as a fall-back.
<br />Thanks to the new method of playback meSpeak.js was tested successfully with iOS/Safari (iOS
6).
<br />
346 Also starting with v
.1.1 there is now an option to rather export the raw speech-data than playing the sound (see: options,
"rawdata
").
<br /><br />
347 <strong>v
.1.2</strong> adds volume control and the capacity to play back cached streams generated using the
"rawdata
" option.
<br /><br />
348 <strong>v
.1.5</strong> adds an optional callback-argument to the methods
<tt>meSpeak.speak()
</tt> and
<tt>meSpeak.play()
</tt>.
<br /><br />
349 <strong>v
1.6</strong> adds support for voice-variants (like
<em>female voices
</em>) and includes the required definitions files.
<br /><br />
350 <strong>v
1.7</strong> finally supports the complete set of usable espeak-options.
<br /><br />
351 <strong>v
1.9</strong> adds
<tt>meSpeak.stop()
</tt> in order to stop all sounds or one or more spefic sound(s).
<br /><br />
352 <strong>v
1.9.3</strong> adds support for the complete Basic Latin and Latin-
1 Supplement Unicode range (U+
0000 .. U+
00FF).
<br /><br />
353 <strong>v
1.9.4</strong> meSpeak now recovers gracefully and transparently from any internal FS errors (which might show up at the
80th call to
<tt>meSpeak.speak()
</tt>).
<br /><br />
354 <strong>v
1.9.5</strong> Added
<tt><a href=
"multipartExample.html">meSpeak.speakMultipart()
</a></tt> in order to combine multiple voices.
<br /><br />
355 <strong>v
1.9.7.1</strong> <em>Important Upadate
</em> — Fixes for changes in Web Audio API in Apple Safari
9.x (iOS and Mac OS X).
357 <p>Some
<strong>real world examples
</strong> (at masswerk.at):
<br />
358 • Explore client-side speech I/O with
<a href=
"http://www.masswerk.at/eliza/" target=
"_blank">E.L.I.Z.A. Talking
</a><br />
359 • Celebrating meSpeak.js v
.1.5:
<a href=
"http://www.masswerk.at/mespeak/rap/" target=
"_blank">JavaScript Doing The JavaScript Rap (featuring MC meSpeak)
</a> <small>(a heavy performance test)
</small>
363 <p class=
"codesample"><strong>meSpeak.loadConfig(
"mespeak_config.json
");
</strong>
364 <strong>meSpeak.loadVoice('en-us.json');
</strong>
365 meSpeak.speak('hello world');
366 meSpeak.speak('hello world', { option1: value1, option2: value2 .. });
367 meSpeak.speak('hello world', { option1: value1, option2: value2 .. }, myCallback);
369 var id = meSpeak.speak('hello world');
372 <strong>meSpeak.speak( text [, { option1: value1, option2: value2 .. } [, callback ]] );
</strong>
374 <strong>text
</strong>: The string of text to be spoken.
375 The text may contain line-breaks (
"\n
") and special characters.
376 Default text-encoding is UTF-
8 (see the option
"utf16
" for other).
378 <em><strong>options
</strong> (eSpeak command-options):
</em>
379 *
<strong>amplitude
</strong>: How loud the voice will be (default:
100)
380 *
<strong>pitch
</strong>: The voice pitch (default:
50)
381 *
<strong>speed
</strong>: The speed at which to talk (words per minute) (default:
175)
382 *
<strong>voice
</strong>: Which voice to use (default: last voice loaded or defaultVoice, see below)
383 *
<strong>wordgap
</strong>: Additional gap between words in
10 ms units (default:
0)
384 *
<strong>variant
</strong>: One of the variants to be found in the eSpeak-directory
"~/espeak-data/voices/!v
"
385 Variants add some effects to the normally plain voice, e.g. notably a female tone.
387 "f1
",
"f2
",
"f3
",
"f4
",
"f5
" for female voices
388 "m1
",
"m2
",
"m3
",
"m4
",
"m5
",
"m6,
"m7
" for male voices
389 "croak
",
"klatt
",
"klatt2
",
"klatt3
",
"whisper
",
"whisperf
" for other effects.
390 (Using eSpeak, these would be appended to the
"-v
" option by
"+
" and the value.)
391 Note: Try
"f2
" or
"f5
" for a female voice.
392 *
<strong>linebreak
</strong>: (Number) Line-break length, default value:
0.
393 *
<strong>capitals
</strong>: (Number) Indicate words which begin with capital letters.
394 1: Use a click sound to indicate when a word starts with a capital letter,
395 or double click if word is all capitals.
396 2: Speak the word
"capital
" before a word which begins with a capital letter.
397 Other values: Increases the pitch for words which begin with a capital letter.
398 The greater the value, the greater the increase in pitch. (eg.:
20)
399 *
<strong>punct
</strong>: (Boolean or String) Speaks the names of punctuation characters when they are encountered
400 in the text. If a string of characters is supplied, then only those listed punctuation
401 characters are spoken, eg. {
"punct
":
".,;?
" }.
402 *
<strong>nostop
</strong>: (Boolean) Removes the end-of-sentence pause which normally occurs at the end of the text.
403 *
<strong>utf16
</strong>: (Boolean) Indicates that the input is UTF-
16, default: UTF-
8.
404 *
<strong>ssml
</strong>: (Boolean) Indicates that the text contains SSML (Speech Synthesis Markup Language)
405 tags or other XML tags. (A small set of HTML is supported too.)
407 <em>further options (meSpeak.js specific):
</em>
408 *
<strong>volume
</strong>: Volume relative to the global volume (number,
0.
.1, default:
1)
409 Note: the relative volume has no effect on the export using option 'rawdata'.
410 *
<strong>rawdata
</strong>: Do not play, return data only.
411 The type of the returned data is derived from the value (case-insensitive) of 'rawdata':
412 - '
<strong>base64
</strong>': returns a base64-encoded string.
413 - '
<strong>mime
</strong>': returns a base64-encoded data-url (including the MIME-header).
414 (synonyms: 'data-url', 'data-uri', 'dataurl', 'datauri')
415 - '
<strong>array
</strong>': returns a plain Array object with uint
8 bit data.
416 -
<strong>default
</strong> (any other value): returns the generated wav-file as an ArrayBuffer (
8-bit unsigned).
417 Note: The value of 'rawdata' must evaluate to boolean 'true' in order to be recognized.
418 *
<strong>log
</strong>: (Boolean) Logs the compiled eSpeak-command to the JS-console.
420 <strong>callback
</strong>: An optional callback function to be called after the sound output ended.
421 The callback will be called with a single boolean argument indicating success.
422 If the resulting sound is stopped by
<em>meSpeak.stop()
</em>, the success-flag will be set to false.
424 <strong>Returns
</strong>:
425 * if called with option
<strong>rawdata
</strong>: a stream in the requested format
426 (or
<em>null
</em>, if the required resources have not loaded yet).
428 * default: a
32bit integer ID greater than
0 (or
0 on failure).
429 The ID may be used to stop this sound by calling
<strong>meSpeak.stop(
</strong><id
><strong>)
</strong>.
432 if (
<strong>meSpeak.isVoiceLoaded('de')
</strong>)
<strong>meSpeak.setDefaultVoice('de');
</strong>
433 // note: the default voice is always the the last voice loaded
435 <strong>meSpeak.loadVoice('fr.json', userCallback);
</strong>
436 // userCallback is an optional callback-handler. The callback will receive two arguments:
437 // * a boolean flag for success
438 // * either the id of the voice, or a reason for errors ('network error', 'data error', 'file error')
439 alert(
<strong>meSpeak.getDefaultVoice()
</strong>); // 'fr'
441 if (
<strong>meSpeak.isConfigLoaded()
</strong>) meSpeak.speak('Configuration data has been loaded.');
442 // note: any calls to speak() will be deferred, if no valid config-data has been loaded yet.
444 <strong>meSpeak.setVolume(
0.5);
</strong>
446 <strong>meSpeak.setVolume( volume [, id-list] );
</strong>
447 Sets a volume level (
0 <= v <=
1)
448 * if called with a single argument, the method sets the global playback-volume, any sounds currently
449 playing will be updated immediately with respect to their relative volume (if specified).
450 * if called with more than a single argument, the method will set and adjust the relative volume of
451 the sound(s) with corresponding ID(s).
452 Returns: the volume provided.
454 alert(
<strong>meSpeak.getVolume()
</strong>); //
0.5
456 <strong>meSpeak.getVolume( [id] );
</strong>
457 Returns a volume level (
0 <= v <=
1)
458 * if called without an argument, the method returns the global playback-volume.
459 * if called with an argument, the method will return the relative volume of the sound with the ID
460 corresponding to the first argument.
461 if no sound with a corresponding ID is found, the method will return 'undefined'.
463 var browserCanPlayWavFiles =
<strong>meSpeak.canPlay();
</strong> // test for compatibility
465 //
<strong>export speech-data
</strong> as a stream (no playback):
466 var myUint8Array = meSpeak.speak('hello world', { 'rawdata': true }); // typed array
467 var base64String = meSpeak.speak('hello world', { 'rawdata': 'base64' });
468 var myDataUrl = meSpeak.speak('hello world', { 'rawdata': 'data-url' });
469 var myArray = meSpeak.speak('hello world', { 'rawdata': 'array' }); // simple array
471 // playing cached streams (any of the export formats):
472 <strong>meSpeak.play( stream [, relativeVolume [, callback]] );
</strong>
473 var stream1 = meSpeak.speak('hello world', { 'rawdata': true });
474 var stream2 = meSpeak.speak('hello again', { 'rawdata': true });
475 var stream3 = meSpeak.speak('hello yet again', { 'rawdata': 'data-url' });
476 meSpeak.play(stream1); // using global volume
477 meSpeak.play(stream2,
0.75); //
75% of global volume
478 meSpeak.play(stream3); // v
.1.4.2: play data-urls or base64-encoded
480 var id = meSpeak.play(stream1);
484 <strong>stream
</strong>: A stream in any of the formats returned by
<em>meSpeak.play()
</em> with the
"rawdata
"-option.
485 <strong>volume
</strong>: (optional) Volume relative to the global volume (number,
0.
.1, default:
1)
486 <strong>callback
</strong>: (optional) A callback function to be called after the sound output ended.
487 The callback will be called with a single boolean argument indicating success.
488 If the sound is stopped by
<em>meSpeak.stop()
</em>, the success-flag will be set to false.
489 (See also: meSpeak.speak().)
491 <strong>Returns
</strong>: A
32bit integer ID greater than
0 (or
0 on failure).
492 The ID may be used to stop this sound by calling
<strong>meSpeak.stop(
</strong><id
><strong>)
</strong>.
495 <strong>meSpeak.stop( [
<id-list
>] );
</strong>
496 Stops the sound(s) specified by the
<em>id-list
</em>.
497 If called without an argument, all sounds currently playing, processed, or queued are stopped.
498 Any callback(s) associated to the sound(s) will return
<strong>false
</strong> as the success-flag.
501 <strong>id-list
</strong>: Any number of IDs returned by a call to
<em>meSpeak.speak()
</em> or
<em>meSpeak.play()
</em>.
503 <strong>Returns
</strong>:
504 The number (integer) of sounds actually stopped.
508 <p><strong>Note on export formats
</strong>, ArrayBuffer (typed array, defaul) vs. simple array:
<br />The ArrayBuffer (
8-bit unsigned) provides a stream ready to be played by the Web Audio API (as a value for a BufferSourceNode), while the plain array (JavaScript Array object) may be best for export (e.g. sending the data to Flash via Falsh's ExternalInterface). The default raw format (ArrayBuffer) is the preferred format for caching streams to be played later by meSpeak by calling
<tt>meSpeak.play()
</tt>, since it provides the least overhead in processing.
</p>
510 <h3>meSpeak.speakMultipart()
— concatenating multiple voices
</h3>
511 <p>Using
<tt>meSpeak.speakMultipart()
</tt> you may mix multiple parts into a single utterance.
</p>
512 <p>See the
<a href=
"multipartExample.html">Multipart-Example
</a> for a demo.
</p>
513 <p>The general form of
<tt>meSpeak.speakMultipart()
</tt> is analogous to
<tt>meSpeak.speak()
</tt>, but with an array of objects (the parts to be spoken) as the first argument (rather than a single text):
</p>
515 <p class=
"codesample">
516 <strong>meSpeak.speakMultipart(
</strong> <parts-array
> [,
<options-object
> [,
<callback-function
> ]]
<strong>)
</strong>;
518 <strong>meSpeak.speakMultipart(
</strong>
520 { text:
"text-
1",
<other options
> ] },
521 { text:
"text-
2",
<other options
> ] },
523 { text:
"text-n
",
<other options
> ] },
525 { option1: value1, option2: value2 .. },
529 <p>Only the the first argument is mandatory, any further arguments are optional.
<br />
530 The
<em>parts-array
</em> must contain a single element (of type object) at least.
<br />
531 For any other options refer to
<tt>meSpeak.speak()
</tt>. Any options supplied as the second argument will be used as defaults for the individual parts. (Same options provided with the individual parts will override these defaults.)
<br />
532 The method returns
— like
<tt>meSpeak.speak()
</tt> — either an ID, or, if called with the
<tt>"rawdata
"</tt> option (in the general options / second argument), a stream-buffer representing the generated wav-file.
</p>
534 <h3>Note on iOS Limitations
</h3>
535 <p>iOS (currently supported only using Safari) provides a single audio-slot, playing only one sound at a time.
<br />
536 Thus, any concurrent calls to
<tt>meSpeak.speak()
</tt> or
<tt>meSpeak.play()
</tt> will stop any other sound playing.
<br />
537 Further, iOS reserves volume control to the user exclusively. Any attempt to change the volume by a script will remain without effect.
<br />
538 Please note that you still need a user-interaction at the very beginning of the chain of events in order to have a sound played by iOS.
</p>
540 <h3>Note on Options
</h3>
541 <p>The first set of options listed above corresponds directly to options of the
<strong>espeak
</strong> command. For details see the
<a href=
"http://espeak.sourceforge.net/commands.html" target=
"_blank">eSpeak command documentation
</a>.
<br />The meSpeak.js-options and their espeak-counterparts are (
<tt>mespeak.speak()
</tt> accepts both sets, but prefers the long form):
</p>
542 <table border=
"0" class=
"opttable">
544 <tr><td><strong>meSpeak.js
</strong></td><td><strong>eSpeak
</strong></td></tr>
545 <tr><td>amplitude
</td><td>-a
</td></tr>
546 <tr><td>wordgap
</td><td>-g
</td></tr>
547 <tr><td>pitch
</td><td>-p
</td></tr>
548 <tr><td>speed
</td><td>-s
</td></tr>
549 <tr><td>voice
</td><td>-v
</td></tr>
550 <tr><td>variant
</td><td>-v
<voice
>+
<variant
></td></tr>
551 <tr><td>utf16
</td><td>-b
4 (default: -b
1)
</td></tr>
552 <tr><td>linebreak
</td><td>-l
</td></tr>
553 <tr><td>capitals
</td><td>-k
</td></tr>
554 <tr><td>nostop
</td><td>-z
</td></tr>
555 <tr><td>ssml
</td><td>-m
</td></tr>
556 <tr><td>punct
</td><td>--punct[=
"<characters
>"]
</td></tr>
560 <h3>Voices Currently Available
</h3>
562 <li><strong>ca
</strong> (Catalan)
</li>
563 <li><strong>cs
</strong> (Czech)
</li>
564 <li><strong>de
</strong> (German)
</li>
565 <li><strong>el
</strong> (Greek)
</li>
566 <li><strong>en/en
</strong> (English)
</li>
567 <li><strong>en/en-n
</strong> (English, regional)
</li>
568 <li><strong>en/en-rp
</strong> (English, regional)
</li>
569 <li><strong>en/en-sc
</strong> (English, Scottish)
</li>
570 <li><strong>en/en-us
</strong> (English, US)
</li>
571 <li><strong>en/en-wm
</strong> (English, regional)
</li>
572 <li><strong>eo
</strong> (Esperanto)
</li>
573 <li><strong>es
</strong> (Spanish)
</li>
574 <li><strong>es-la
</strong> (Spanish, Latin America)
</li>
575 <li><strong>fi
</strong> (Finnish)
</li>
576 <li><strong>fr
</strong> (French)
</li>
577 <li><strong>hu
</strong> (Hungarian)
</li>
578 <li><strong>it
</strong> (Italian)
</li>
579 <li><strong>kn
</strong> (Kannada)
</li>
580 <li><strong>la
</strong> (Latin)
</li>
581 <li><strong>lv
</strong> (Latvian)
</li>
582 <li><strong>nl
</strong> (Dutch)
</li>
583 <li><strong>pl
</strong> (Polish)
</li>
584 <li><strong>pt
</strong> (Portuguese, Brazil)
</li>
585 <li><strong>pt-pt
</strong> (Portuguese, European)
</li>
586 <li><strong>ro
</strong> (Romanian)
</li>
587 <li><strong>sk
</strong> (Slovak)
</li>
588 <li><strong>sv
</strong> (Swedish)
</li>
589 <li><strong>tr
</strong> (Turkish)
</li>
590 <li><strong>zh
</strong> (Mandarin Chinese, Pinyin)
<a href=
"#note_zh" class=
"noteLink">*
</a></li>
591 <li><strong>zh-yue
</strong> (Cantonese Chinese, Provisional)
<a href=
"#note_zh-yue" class=
"noteLink">**
</a></li>
594 <h3>JSON File Formats
</h3>
595 <p>1) Config-data:
"mespeak_config.json
":
<br />The config-file includes all data to configure the tone (e.g.: male or female) of the electronic voice.
</p>
596 <p class=
"codesample">{
597 "config
":
"<base64-encoded octet stream
>",
598 "phontab
":
"<base64-encoded octet stream
>",
599 "phonindex
":
"<base64-encoded octet stream
>",
600 "phondata
":
"<base64-encoded octet stream
>",
601 "intonations
":
"<base64-encoded octet stream
>"
603 <p>Finally the JSON object may include an optional voice-object (see below), that will be set up together with the config-data:
</p>
604 <p class=
"codesample">{
606 "voice
": {
<voice-data
> }
609 <p>2) Voice-data:
"voice.json
":
<br />A voice-file includes the ids of the voice and the dictionary used by this voice, and the binary data of theses two files.
</p>
610 <p class=
"codesample">{
611 "voice_id
":
"<voice-identifier
>",
612 "dict_id
":
"<dict-identifier
>",
613 "dict
":
"<base64-encoded octet stream
>",
614 "voice
":
"<base64-encoded octet stream
>"
616 <p>Alternatively the value of
<tt>"voice
"</tt> may be a text-string, if an additional property
<tt>"voice_encoding
":
"text
"</tt> is provided.
<br />This shold allow for quick changes and testing:
</p>
617 <p class=
"codesample">{
618 "voice_id
":
"<voice-identifier
>",
619 "dict_id
":
"<dict-identifier
>",
620 "dict
":
"<base64-encoded octet stream
>",
621 "voice
":
"<text-string
>",
622 "voice_encoding
":
"text
"
625 <p>Both config-data and voice-data may be loaded and switched on the fly to (re-)configure meSpeak.js.
</strong></p>
626 <p>For a guide to customizing languages and voices, see
<em><a href=
"voices-and-languages.html">meSpeak
– Voices
& Languages
</a></em>.
628 <h3>Extendet Voice Format, Mbrola Voices
</h3>
629 <p>In order to support
<a href=
"http://espeak.sourceforge.net/mbrola.html" target=
"_blank">Mbrola voices
</a> and other voices requiring a more flexible layout and/or additional data, there is also an
<em>extended voice format
</em>:
</p>
630 <p class=
"codesample">{
631 "voice_id
":
"<voice-identifier
>",
632 "voice
":
"<base64-encoded octet stream
>"
635 "path
",
"<rel-pathname
>",
636 "data
",
"<base64-encoded octet stream
>"
639 "path
",
"<rel-pathname
>",
640 "data
",
"<text-string
>",
641 "encoding
":
"text
"
646 <p>or (using a text-encoded voice-definition):
</p>
647 <p class=
"codesample">{
648 "voice_id
":
"<voice-identifier
>",
649 "voice
":
"<text-string
>",
650 "voice_encoding
":
"text
"
653 "path
",
"<rel-pathname
>",
654 "data
",
"<base64-encoded octet stream
>"
657 "path
",
"<rel-pathname
>",
658 "data
",
"<text-string
>",
659 "encoding
":
"text
"
664 <p>Only a valid voice-definition is required and optionally an array
<tt>"files
"</tt> which may be empty or contain any number of objects, containing a property
<tt>"path
"</tt> (relative file-path from the espeak-data-directory) and a property
<tt>"data
"</tt>, containing the file (either as base64-encoded data or as plain text, if there is also an optional property
<tt>"encoding
":
"text
"</tt>).
</p>
665 <p>In order to facilitate the use of Mbrola voices, for any
<tt>"voice_id
"</tt> beginning with
<tt>"mb/mb-
"</tt> only the part following the initial
<tt>"mb/
"</tt> will be used as the internal identifyer for the
<tt>meSpeak.speak()
</tt> method. (So any given
<em>voice_id
</em> <tt>"mb/mb-en1
"</tt> will be translated to a
<em>voice
</em> <tt>"mb-en1
"</tt> automatically. This applies to the speak-command only.)
</p>
666 <p><em>Please don't ask for support on Mbrola voices (I don't have the faintest idea). Please refer to
<a href=
"http://espeak.sourceforge.net/mbrola.html" target=
"_blank">Mbrola section of the eSpeak documentation
</a> for a guide to setting up the required files locally. It should be possible to load these into meSpeak.js using the
"extended voice format
", since you may put any additional payload into the files-array. Please mind that you will still require a text-to-phoneme translator as stated in the
<a href=
"http://espeak.sourceforge.net/mbrola.html" target=
"_blank">eSpeak documentation
</a> (this is out of the scope of meSpeak.js).
</em></p>
668 <h3>Deferred Calls
</h3>
669 <p>In case that speak() is called before configuration and/or voice data has been loaded, the call will be deferred and executed after set up.
<br />See this
<a href=
"deferred-call-demo.html">page
</a> for an example. You may reset the queue manually by calling
</p>
670 <p class=
"codesample">meSpeak.resetQueue();
</p>
672 <h3>Amplitude and Volume
</h3>
673 <p>There are now two separate parameters or options to control the volume of the spoken text: amplitude and volume.
<br />While
<em>amplitude
</em> affects the generation of the sound stream by the TTS-algorithm,
<em>volume
</em> controls the playback volume of the browser. By the use of
<em>volume
</em> you can cache a generated stream and still provide an individual volume level at playback time. Please note that there is a global volume (controlled by
<tt>setVolume()
</tt>) and an individual volume level relative to the global one. Both default to
1 (max volume).
</p>
675 <h3>Notes on Chinese Languages and Voices
</h3>
676 <p>Please note that the Chinese voices do only support Pinyin input (phonetic transcript like
"<tt>zhong1guo2
</tt>" for
中 +
国, China) for
"zh
" and simple one-to-one translation from single Simplified Chinese characters or Jyutping romanised text for
"zh-yue
".
</p>
677 <p>The
<em>eSpeak
</em> documentation provides the following notes:
</p>
678 <blockquote id=
"note_zh" class=
"note">*)
<strong>zh (Mandarin Chinese)
</strong>:
<br />This speaks Pinyin text and Chinese characters. There is only a simple one-to-one translation of Chinese characters to a single Pinyin pronunciation. There is no attempt yet at recognising different pronunciations of Chinese characters in context, or of recognising sequences of characters as
"words". The eSpeak installation includes a basic set of Chinese characters. More are available in an additional data file for Mandarin Chinese at: http://espeak.sourceforge.net/data/.
</blockquote>
679 <blockquote id=
"note_zh-yue" class=
"note">**)
<strong>zh-yue (Cantonese Chinese, Provisional)
</strong>:
<br />Just a naive simple one-to-one translation from single Simplified Chinese characters to phonetic equivalents in Cantonese. There is limited attempt at disambiguation, grouping characters into words, or adjusting tones according to their surrounding syllables. This voice needs Chinese character to phonetic translation data, which is available as a separate download for Cantonese at: http://espeak.sourceforge.net/data/.
<br />The voice can also read Jyutping romanised text.
</blockquote>
680 <p>For a simple zh-to-Pinyin translation in JavaScript see:
<a href=
"http://www.masswerk.at/mespeak/zh-pinyin-translator.zip">http://www.masswerk.at/mespeak/zh-pinyin-translator.zip
</a></p>
682 <h3>Flash-Fallback for Wave Files
</h3>
683 <p>(m)eSpeak produces internally wav-files, which are then played. Internet Explorer
10 supports typed arrays (which are required for the binary logic), but does not provide native playback of wav-files. To provide compatibility for this browser, you could try the experimental
<a href=
"msie_flashFallback/index.html">meSpeak Flash Fallback
</a>.
</p>
685 <a name=
"download" id=
"download"></a>
687 <p><strong>Download
</strong> (all code under GPL):
<a href=
"http://www.masswerk.at/mespeak/mespeak.zip">mespeak.zip
</a><br />
688 (v
.1.9.7.1, last update:
2015-
10-
07 13:
00 GMT)
</p>
689 <h3>Version History
</h3>
691 <dt>v
.1.9.7.1</dt><dd>Fix for Web Audio API changes in Apple Safari
9.x (Mac OS X and iOS, compare v
.1.9.2).
</dd>
692 <dt>v
.1.9.6</dt><dd>Minor internal changes.
</dd>
693 <dt>v
.1.9.5</dt><dd>Added
<tt>meSpeak.speakMultipart()
</tt>.
<br />Also,
<tt>meSpeak.speak()
</tt> and
<tt>meSpeak.speakMultipart()
</tt> won't fail on a missing voice any more: As soon as there is a default-voice loaded and set, the default-voice will be used instead.
</dd>
694 <dt>v
.1.9.4.1</dt><dd>Fixed a bug in the error handling on missing voices.
</dd>
695 <dt>v
.1.9.4</dt><dd>Finally found a work-around for the Emscripten FS breaking on the
80th call to
<tt>run()
</tt> (internally called by
<tt>meSpeak.speak()
</tt>): We now reboot gracefully, preserving any loaded files; no external effects or differences in behavior are caused by this. In order to accomplish this, the eSpeak-core is now run as an instance of a constructor.
</dd>
696 <dt>v
.1.9.3</dt><dd>Added support for the Unicode Basic Latin and Latin-
1 Supplement character range (U+
0000 . U+
00FF).
<br />(Emscripten originally supports only the C-locale,
7-bit ASCII.)
</dd>
697 <dt>v
.1.9.2</dt><dd>Fix for Chrome
32: Worked around a behavioral change (bug?) in Chrome
32.
<br />It might be worth noting that it is no more possible to play back sound with the Web Audio API by the same code with Webkit iOS and Chrome while using the
<tt>decodeAudioData
</tt>-method. (Welcome back to user-agent sniffing. Really Google?)
<br /><br />
698 Since this might be of general interest, here is a short tutorial:
699 <p class=
"history_codesample">
700 <span class=
"comment">/* Cross-Browser Web Audio API Playback With Chrome And Callbacks */
</span>
702 <span class=
"comment">// alias the Web Audio API AudioContext-object
</span>
703 var aliasedAudioContext = window.AudioContext || window.webkitAudioContext;
704 <span class=
"comment">// ugly user-agent-string sniffing
</span>
705 var isChrome = ((typeof navigator !== 'undefined')
&& navigator.userAgent
&&
706 navigator.userAgent.indexOf('Chrome') !== -
1);
707 var chromeVersion = (isChrome)?
709 navigator.userAgent.replace(/^.*?\bChrome\/([
0-
9]+).*$/, '$
1'),
713 function playSound(streamBuffer, callback) {
714 <span class=
"comment">// set up a BufferSource-node
</span>
715 var audioContext = new aliasedAudioContext();
716 var source = audioContext.createBufferSource();
717 source.connect(audioContext.destination);
718 <span class=
"comment">// since the ended-event isn't generally implemented,
719 // we need to use the decodeAudioData()-method in order
720 // to extract the duration to be used as a timeout-delay
</span>
721 audioContext.decodeAudioData(streamBuffer, function(audioData) {
722 <span class=
"comment">// detect any implementation of the ended-event
723 // Chrome added support for the ended-event lately,
724 // but it's unreliable (doesn't fire every time)
725 // so let's exclude it.
</span>
726 if (!isChrome
&& source.onended !== undefined) {
727 <span class=
"comment">// we could also use
"source.addEventListener('ended', callback, false)
" here
</span>
728 source.onended = callback;
731 var duration = audioData.duration;
732 <span class=
"comment">// convert to msecs
733 // use a default of
1 sec, if we lack a valid duration
</span>
734 var delay = (duration)? Math.ceil(duration *
1000) :
1000;
735 setTimeout(callback, delay);
737 <span class=
"comment">// finally assign the buffer
</span>
738 source.buffer = audioData;
739 <span class=
"comment"> // start playback for Chrome
>=
32
740 // please note that this would be without effect on iOS, since we're
741 // inside an async callback and iOS requires direct user interaction
</span>
742 if (chromeVersion
>=
32) source.start(
0);
744 function(error) {
<span class=
"comment">/* decoding-error-callback */
</span> });
745 <span class=
"comment">// normal start of playback, this would be essentially autoplay
746 // but is without any effect in Chrome
32
747 // let's exclude Chrome
32 and higher to avoid any double calls anyway
</span>
748 if (!isChrome || chromeVersion
< 32) {
758 <dt>v
.1.9.1</dt><dd>Added support for IDs to
<tt>meSpeak.setVolume()
</tt> and
<tt>meSpeak.getVolume()
</tt> in order to optionally address relative playback volumes of individual sounds.
<br />(If IDs are supplied as optional arguments, the volume will be the relative volume of the sound(s) with corresponding ID(s), else the global playback volume.)
</dd>
759 <dt>v
.1.9</dt><dd>Added
<tt>meSpeak.stop()
</tt>. For this a new return value is introduced:
<br /><tt>meSpeak.speak()
</tt> and
<tt>meSpeak.play()
</tt> return now a
32bit numeric ID (quite like
<tt>setTimeout()
</tt>).
<br />IDs may be provided to
<tt>meSpeak.stop()
</tt> as argument(s) in order to stop specific sounds.
<br />If
<tt>meSpeak.stop()
</tt> is called without any arguments, all sounds currently processed, playing, or queued will be stopped.
<br /><tt>meSpeak.speak()
</tt> returns still an audio-stream in the requested format, if called with the
"<tt>rawdata
</tt>"-option.
<br />
760 In case of failing,
<tt>0</tt> is returned as an ID (or
<tt>null
</tt> with a
"<tt>rawdata
</tt>"-request), while a successful call will always return an ID greater than
<tt>0</tt>.
</dd>
761 <dt>v
.1.8.7</dt><dd>Returned to improved handling of durations reported by Web Audio streams, used to handle callbacks. This is as in
1.8.5.
</dd>
762 <dt>v
.1.8.6</dt><dd>Fixed a bug (itroduced in a previous version) preventing tablet-based webkit-browsers from actually playing. (So you can't start a sound from inside the
<tt>decodeAudioData()
</tt>-callback?)
</dd>
763 <dt>v
.1.8.5</dt><dd>Disabled the Web Audio source-node's
<em>onended
</em> event-handler for Chrome to work around a bug in Chrome, where the event is not firing reliably. (We are falling back to a timeout on the stream's duration like before Chrome implemented the onended event.)
</dd>
764 <dt>v
.1.8.4</dt><dd><tt>speak()
</tt> now also accepts the eSpeak flags as option keys (e.g.
"<tt>k
</tt>" for
"<tt>capitals
</tt>" or
"<tt>v
</tt>" for
"</tt>voice
</tt>", cf. the note on options).
<br />Added documentation for the
"<tt>punct
</tt>"-option.
</dd>
765 <dt>v
.1.8.3</dt><dd><tt>speak()
</tt> now cleans up the filesystem from the internal wav-file after use and returns a unique array of the resulting sound-data (rather than just a pointer to the array produced by emscriptens filesystem).
</dd>
766 <dt>v
.1.8.2</dt><dd>Added a a bit of delay before finally unlinking any Web Audio API resources (working around a Chrome duration issue).
<tt>meSpeak.play()
</tt> now reports in the log the object type of any unsuitable input.
</dd>
767 <dt>v
.1.8.1</dt><dd>Tweeked the handling of Mbrola voices.
</dd>
768 <dt>v
.1.8</dt><dd>Added support for extended voice-formats (like Mbrola voices).
</dd>
769 <dt>v
.1.7</dt><dd>Added support for various minor eSpeak-options (now the full set of usable options is supported).
<br />Also, we indicate explicitely that the text to be spoken is UTF-
8 encoded (if not specified otherwise) rather than reliying on defaults.
</dd>
770 <dt>v
.1.6</dt><dd>Added support for voice-variants.
</dd>
771 <dt>v
.1.5.1</dt><dd>Fixed deferred call option to include and execute any callbacks.
</dd>
772 <dt>v
.1.5</dt><dd>Added an optional callback to
<tt>meSpeak.speak()
</tt> and
<tt>meSpeak.play()
</tt>.
<br />Added some clean-up code to prevent any memory leaks with some implementations of the Web Audio API.
<br />Removed any references to
"<tt>window
</tt>" in favor for
"<tt>self
</tt>".
</dd>
773 <dt>v
.1.4.4</dt><dd>Cleaned up a bit of the Emscripten-generated code, changed wording in this page.
</dd>
774 <dt>v
.1.4.3</dt><dd>Better handling for base64-imports when using the HTMLAudioElement for playback with
<tt>meSpeak.play()
</tt>. (Less overhead.)
</dd>
775 <dt>v
.1.4.2</dt><dd>Added base64 or data-url as import-format for
<tt>meSpeak.play()
</tt>.
</dd>
776 <dt>v
.1.4.1</dt><dd>Added a guide to voices and languages and an experimental Flash-fallback for MSIE10. No changes to the meSpeak-code.
</dd>
777 <dt>v
.1.4</dt><dd>Added an option to export data as a plain array.
</dd>
778 <dt>v
.1.3.1</dt><dd>Fixed a bug in the decoding of text-formatted voice data.
</dd>
779 <dt>v
.1.3</dt><dd>Added alternative text format for voices.
</dd>
780 <dt>v
.1.2</dt><dd>Added volume control and capability to play back exported audio-streams.
</dd>
781 <dt>v
.1.1</dt><dd>Added support for the Web Audio API (AudioContext), which is now the preferred method to play the generated sound. Browsers lacking support for the Web Audio API will use the HTMLAudioElement for playback. (v
.1.1 was succesfully tested to play on iOS
6/Safari.) Also added an option to export the raw data in various formats.
</dd>
782 <dt>v
.1.04</dt><dd>Demo-page: Auto-speak will now be triggered only, if a URL-parameter
"auto
" set to
"true
" or
"1" is provided.
<br />(This additional parameter should inhibit any repeated attempts to play in case the script would fail and the demo-form would be sent via GET-parameters.)
</dd>
783 <dt>v
.1.03</dt><dd>Added an instant link for auto-speak to this demo-page.
</dd>
784 <dt>v
.1.02</dt><dd>Added Chinese voice-data (zh, zh-yue) by popular request.
</dd>
785 <dt>v
.1.01</dt><dd>Added an onload-callback to the assignment of the generated audio-data-URL. This should add compatibility to newer versions of WebKit and Chrome.
</dd>
786 <dt>v
.1.0</dt><dd>Initial upload.
</dd>
789 <hr class=
"separator" />
791 <h3>About speak.js
</h3>
793 <strong>speak.js
</strong> is
100% clientside JavaScript.
"<a href=
"https://github.com/kripken/speak.js" target=
"_blank">speak.js
</a>" is a port of
<a href=
"http://espeak.sourceforge.net/" target=
"_blank">eSpeak
</a>, an open source speech synthesizer, which was compiled from C++ to JavaScript using
<a href=
"http://emscripten.org" target=
"_blank">Emscripten
</a>.
<br />
794 The project page and source code for this demo can be found
<a href=
"https://github.com/kripken/speak.js" target=
"_blank">here
</a>.
<br /><em>Note: There had been initially plans to merge this project with speak.js, but they somehow became stuck.
</em>
798 Browser requirements:
799 <ul class=
"bottomMargin">
800 <li><strong>Typed arrays
</strong>. The eSpeak code is not portable to the extent that would be necessary to avoid using typed arrays.
801 (It should however be possible to rewrite small bits of eSpeak to fix that.)
802 Typed arrays are present in Firefox, Chrome, Webkit, and Safari, but not IE or Opera.
</li>
803 <li><strong>Update
</strong>: Opposed to the state of the original documentation, newer versions of Opera and IE both provide support for typed arrays.
</li>
805 Note that recent versions of these browsers are needed in most cases.