4 * Text To Speech classes
6 * Portable Windows Library
8 * Copyright (c) 2002 Equivalence Pty. Ltd.
10 * The contents of this file are subject to the Mozilla Public License
11 * Version 1.0 (the "License"); you may not use this file except in
12 * compliance with the License. You may obtain a copy of the License at
13 * http://www.mozilla.org/MPL/
15 * Software distributed under the License is distributed on an "AS IS"
16 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
17 * the License for the specific language governing rights and limitations
20 * The Original Code is Portable Windows Library.
22 * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
24 * Contributor(s): ______________________________________.
27 * Revision 1.11 2004/06/19 09:02:32 csoutheren
28 * Added rate and volume to keep Festival happy
30 * Revision 1.10 2004/06/19 07:18:59 csoutheren
31 * Change TTS engine registration to use abstract factory code
33 * Revision 1.9 2004/04/09 06:52:17 rjongbloed
34 * Removed #pargma linker command for /delayload of DLL as documentations sais that
37 * Revision 1.8 2004/02/23 23:52:20 csoutheren
38 * Added pragmas to avoid every Windows application needing to include libs explicitly
40 * Revision 1.7 2003/10/30 11:28:25 rjongbloed
41 * Added dircetory name for Speech API under Windows.
43 * Revision 1.6 2003/09/17 06:02:13 csoutheren
44 * Fixed windows header file problem caused by removing recursive headers
46 * Revision 1.5 2003/04/17 01:21:33 craigs
47 * Added import of pybuildopts to correctly detect if P_SAPI is set
49 * Revision 1.4 2003/04/16 08:00:19 robertj
50 * Windoes psuedo autoconf support
52 * Revision 1.3 2002/11/06 22:47:25 robertj
53 * Fixed header comment (copyright etc)
55 * Revision 1.2 2002/08/14 15:18:25 craigs
56 * Fixed Festval implementation
58 * Revision 1.1 2002/08/06 04:45:58 craigs
64 #pragma implementation "ptts.h"
67 #include "ptbuildopts.h"
70 ////////////////////////////////////////////////////////////
72 // WIN32 COM stuff must be first in file to compile properly
76 #if defined(P_SAPI_LIBRARY)
77 #pragma comment(lib, P_SAPI_LIBRARY)
93 ////////////////////////////////////////////////////////////
95 // this disables the winsock2 stuff in the Windows contain.h, to avoid header file problems
96 #define P_KNOCKOUT_WINSOCK2
99 #include <ptlib/pipechan.h>
100 #include <ptclib/ptts.h>
102 ////////////////////////////////////////////////////////////
104 // Text to speech using Microsoft's Speech API (SAPI)
105 // Can be downloaded from http://www.microsoft.com/speech/download/sdk51
110 #define MAX_FN_SIZE 1024
112 class PTextToSpeech_SAPI
: public PTextToSpeech
114 PCLASSINFO(PTextToSpeech_SAPI
, PTextToSpeech
);
116 PTextToSpeech_SAPI();
117 ~PTextToSpeech_SAPI();
120 PStringArray
GetVoiceList();
121 BOOL
SetVoice(const PString
& voice
);
123 BOOL
SetRate(unsigned rate
);
126 BOOL
SetVolume(unsigned volume
);
127 unsigned GetVolume();
129 BOOL
OpenFile (const PFilePath
& fn
);
130 BOOL
OpenChannel(PChannel
* channel
);
131 BOOL
IsOpen() { return opened
; }
134 BOOL
Speak (const PString
& str
, TextType hint
);
139 static PMutex refMutex
;
140 static int * refCount
;
143 CComPtr
<ISpVoice
> m_cpVoice
;
144 CComPtr
<ISpStream
> cpWavStream
;
147 unsigned rate
, volume
;
151 static PAbstractFactory
<PTextToSpeech
, PTextToSpeech_SAPI
> sapiTTSFactory("Microsoft SAPI");
153 int * PTextToSpeech_SAPI::refCount
;
154 PMutex
PTextToSpeech_SAPI::refMutex
;
157 PTextToSpeech_SAPI::PTextToSpeech_SAPI()
159 PWaitAndSignal
m(refMutex
);
161 if (refCount
== NULL
) {
164 ::CoInitializeEx(NULL
, COINIT_MULTITHREADED
);
169 usingFile
= opened
= FALSE
;
173 PTextToSpeech_SAPI::~PTextToSpeech_SAPI()
175 PWaitAndSignal
m(refMutex
);
177 if ((--(*refCount
)) == 0) {
184 BOOL
PTextToSpeech_SAPI::OpenVoice()
186 PWaitAndSignal
m(mutex
);
188 HRESULT hr
= m_cpVoice
.CoCreateInstance(CLSID_SpVoice
);
189 return (opened
= SUCCEEDED(hr
));
192 BOOL
PTextToSpeech_SAPI::OpenChannel(PChannel
*)
194 PWaitAndSignal
m(mutex
);
198 return (opened
= FALSE
);
202 BOOL
PTextToSpeech_SAPI::OpenFile(const PFilePath
& fn
)
204 PWaitAndSignal
m(mutex
);
212 CSpStreamFormat wavFormat
;
213 wavFormat
.AssignFormat(SPSF_8kHz16BitMono
);
215 WCHAR szwWavFileName
[MAX_FN_SIZE
] = L
"";;
218 wcscpy(szwWavFileName
, T2W((const char *)fn
));
219 HRESULT hr
= SPBindToFile(szwWavFileName
, SPFM_CREATE_ALWAYS
, &cpWavStream
, &wavFormat
.FormatId(), wavFormat
.WaveFormatExPtr());
221 if (!SUCCEEDED(hr
)) {
222 cpWavStream
.Release();
226 hr
= m_cpVoice
->SetOutput(cpWavStream
, TRUE
);
228 return (opened
= SUCCEEDED(hr
));
231 BOOL
PTextToSpeech_SAPI::Close()
233 PWaitAndSignal
m(mutex
);
240 m_cpVoice
->WaitUntilDone(INFINITE
);
241 cpWavStream
.Release();
253 BOOL
PTextToSpeech_SAPI::Speak(const PString
& otext
, TextType hint
)
255 PWaitAndSignal
m(mutex
);
260 PString text
= otext
;
262 // do various things to the string, depending upon the hint
273 // quick hack to calculate length of Unicode string
274 unsigned short * uStr
= new unsigned short[text
.GetLength()+1];
277 wcscpy(uStr
, T2W((const char *)text
));
279 HRESULT hr
= m_cpVoice
->Speak(uStr
, SPF_DEFAULT
, NULL
);
283 return SUCCEEDED(hr
);
286 PStringArray
PTextToSpeech_SAPI::GetVoiceList()
288 PWaitAndSignal
m(mutex
);
290 PStringArray voiceList
;
292 CComPtr
<ISpObjectToken
> cpVoiceToken
;
293 CComPtr
<IEnumSpObjectTokens
> cpEnum
;
296 //Enumerate the available voices
297 HRESULT hr
= SpEnumTokens(SPCAT_VOICES
, NULL
, NULL
, &cpEnum
);
299 // Get the number of voices
301 hr
= cpEnum
->GetCount(&ulCount
);
303 // Obtain a list of available voice tokens, set the voice to the token, and call Speak
304 while (SUCCEEDED(hr
) && ulCount
--) {
306 cpVoiceToken
.Release();
309 hr
= cpEnum
->Next(1, &cpVoiceToken
, NULL
);
312 voiceList
.AppendString("voice");
319 BOOL
PTextToSpeech_SAPI::SetVoice(const PString
& v
)
321 PWaitAndSignal
m(mutex
);
326 BOOL
PTextToSpeech_SAPI::SetRate(unsigned v
)
332 unsigned PTextToSpeech_SAPI::GetRate()
337 BOOL
PTextToSpeech_SAPI::SetVolume(unsigned v
)
343 unsigned PTextToSpeech_SAPI::GetVolume()
351 ////////////////////////////////////////////////////////////
353 // Generic text to speech using Festival
356 class PTextToSpeech_Festival
: public PTextToSpeech
358 PCLASSINFO(PTextToSpeech_Festival
, PTextToSpeech
);
360 PTextToSpeech_Festival();
361 ~PTextToSpeech_Festival();
364 PStringArray
GetVoiceList();
365 BOOL
SetVoice(const PString
& voice
);
367 BOOL
SetRate(unsigned rate
);
370 BOOL
SetVolume(unsigned volume
);
371 unsigned GetVolume();
373 BOOL
OpenFile (const PFilePath
& fn
);
374 BOOL
OpenChannel(PChannel
* channel
);
375 BOOL
IsOpen() { return opened
; }
378 BOOL
Speak (const PString
& str
, TextType hint
);
381 BOOL
Invoke(const PString
& str
, const PFilePath
& fn
);
388 unsigned volume
, rate
;
392 static PAbstractFactory
<PTextToSpeech
, PTextToSpeech_Festival
> festivalTTSFactory("Festival");
394 PTextToSpeech_Festival::PTextToSpeech_Festival()
396 PWaitAndSignal
m(mutex
);
397 usingFile
= opened
= FALSE
;
403 PTextToSpeech_Festival::~PTextToSpeech_Festival()
405 PWaitAndSignal
m(mutex
);
408 BOOL
PTextToSpeech_Festival::OpenChannel(PChannel
*)
410 PWaitAndSignal
m(mutex
);
420 BOOL
PTextToSpeech_Festival::OpenFile(const PFilePath
& fn
)
422 PWaitAndSignal
m(mutex
);
429 PTRACE(3, "TTS\tWriting speech to " << fn
);
434 BOOL
PTextToSpeech_Festival::Close()
436 PWaitAndSignal
m(mutex
);
444 stat
= Invoke(text
, path
);
454 BOOL
PTextToSpeech_Festival::Speak(const PString
& ostr
, TextType hint
)
456 PWaitAndSignal
m(mutex
);
459 PTRACE(3, "TTS\tAttempt to speak whilst engine not open");
465 // do various things to the string, depending upon the hint
473 PTRACE(3, "TTS\tSpeaking " << ostr
);
478 PTRACE(3, "TTS\tStream mode not supported for Festival");
483 PStringArray
PTextToSpeech_Festival::GetVoiceList()
485 PWaitAndSignal
m(mutex
);
487 PStringArray voiceList
;
489 voiceList
.AppendString("default");
494 BOOL
PTextToSpeech_Festival::SetVoice(const PString
& v
)
496 PWaitAndSignal
m(mutex
);
501 BOOL
PTextToSpeech_Festival::SetRate(unsigned v
)
507 unsigned PTextToSpeech_Festival::GetRate()
512 BOOL
PTextToSpeech_Festival::SetVolume(unsigned v
)
518 unsigned PTextToSpeech_Festival::GetVolume()
523 BOOL
PTextToSpeech_Festival::Invoke(const PString
& otext
, const PFilePath
& fname
)
525 PString text
= otext
;
526 text
.Replace('\n', ' ', TRUE
);
527 text
.Replace('\"', '\'', TRUE
);
528 text
.Replace('\\', ' ', TRUE
);
529 text
= "\"" + text
+ "\"";
531 PString cmdLine
= "echo " + text
+ " | ./text2wave -F " + PString(PString::Unsigned
, rate
) + " -otype riff > " + fname
;
542 if (!cmd
.Open(cmdLine
, PPipeChannel::ReadWriteStd
)) {
543 PTRACE(2, "TTS\tCannot execute command " << cmd
);
545 PTRACE(2, "TTS\tCreating " << fname
<< " using " << cmdLine
);
547 code
= cmd
.WaitForTermination();
549 PTRACE(2, "TTS\tdata generated");
551 PTRACE(2, "TTS\tgeneration failed");
560 PINSTANTIATE_FACTORY(PTextToSpeech
)
562 // End Of File ///////////////////////////////////////////////////////////////