4 * Text To Speech classes
6 * Portable Windows Library
8 * Copyright (c) 2002 Equivalence Pty. Ltd.
10 * The contents of this file are subject to the Mozilla Public License
11 * Version 1.0 (the "License"); you may not use this file except in
12 * compliance with the License. You may obtain a copy of the License at
13 * http://www.mozilla.org/MPL/
15 * Software distributed under the License is distributed on an "AS IS"
16 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
17 * the License for the specific language governing rights and limitations
20 * The Original Code is Portable Windows Library.
22 * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
24 * Contributor(s): ______________________________________.
27 * Revision 1.22 2007/09/08 11:34:28 rjongbloed
28 * Improved memory checking (leaks etc), especially when using MSVC debug library.
30 * Revision 1.21 2007/04/04 01:51:38 rjongbloed
31 * Reviewed and adjusted PTRACE log levels
32 * Now follows 1=error,2=warn,3=info,4+=debug
34 * Revision 1.20 2006/07/27 09:48:24 rjongbloed
35 * Fixed DevStudio 2005 compiler compatibility
37 * Revision 1.19 2006/06/21 03:28:44 csoutheren
38 * Various cleanups thanks for Frederic Heem
40 * Revision 1.18 2005/11/30 12:47:41 csoutheren
41 * Removed tabs, reformatted some code, and changed tags for Doxygen
43 * Revision 1.17 2005/01/04 08:09:42 csoutheren
44 * Fixed Linux configure problems
46 * Revision 1.16 2005/01/04 07:44:03 csoutheren
47 * More changes to implement the new configuration methodology, and also to
48 * attack the global static problem
50 * Revision 1.15 2004/10/23 10:56:15 ykiryanov
51 * Added ifdef _WIN32_WCE for PocketPC 2003 SDK port
53 * Revision 1.14 2004/07/12 09:17:20 csoutheren
54 * Fixed warnings and errors under Linux
56 * Revision 1.13 2004/07/06 10:12:54 csoutheren
57 * Added static integer o factory template to assist in ensuring factories are instantiated
59 * Revision 1.12 2004/06/30 12:17:05 rjongbloed
60 * Rewrite of plug in system to use single global variable for all factories to avoid all sorts
61 * of issues with startup orders and Windows DLL multiple instances.
63 * Revision 1.11 2004/06/19 09:02:32 csoutheren
64 * Added rate and volume to keep Festival happy
66 * Revision 1.10 2004/06/19 07:18:59 csoutheren
67 * Change TTS engine registration to use abstract factory code
69 * Revision 1.9 2004/04/09 06:52:17 rjongbloed
70 * Removed #pargma linker command for /delayload of DLL as documentations sais that
73 * Revision 1.8 2004/02/23 23:52:20 csoutheren
74 * Added pragmas to avoid every Windows application needing to include libs explicitly
76 * Revision 1.7 2003/10/30 11:28:25 rjongbloed
77 * Added dircetory name for Speech API under Windows.
79 * Revision 1.6 2003/09/17 06:02:13 csoutheren
80 * Fixed windows header file problem caused by removing recursive headers
82 * Revision 1.5 2003/04/17 01:21:33 craigs
83 * Added import of pybuildopts to correctly detect if P_SAPI is set
85 * Revision 1.4 2003/04/16 08:00:19 robertj
86 * Windoes psuedo autoconf support
88 * Revision 1.3 2002/11/06 22:47:25 robertj
89 * Fixed header comment (copyright etc)
91 * Revision 1.2 2002/08/14 15:18:25 craigs
92 * Fixed Festval implementation
94 * Revision 1.1 2002/08/06 04:45:58 craigs
100 #pragma implementation "ptts.h"
103 #include "ptbuildopts.h"
105 ////////////////////////////////////////////////////////////
106 #include <ptlib/pfactory.h>
107 #include <ptclib/ptts.h>
109 PINSTANTIATE_FACTORY(PTextToSpeech
, PString
)
111 // WIN32 COM stuff must be first in file to compile properly
115 #if defined(P_SAPI_LIBRARY)
116 #pragma comment(lib, P_SAPI_LIBRARY)
120 #define _WIN32_DCOM 1
127 #include <windowsx.h>
128 #include <sphelper.h>
132 ////////////////////////////////////////////////////////////
134 // this disables the winsock2 stuff in the Windows contain.h, to avoid header file problems
135 #define P_KNOCKOUT_WINSOCK2
138 #include <ptlib/pipechan.h>
139 #include <ptclib/ptts.h>
142 ////////////////////////////////////////////////////////////
144 // Text to speech using Microsoft's Speech API (SAPI)
145 // Can be downloaded from http://www.microsoft.com/speech/download/sdk51
150 #define MAX_FN_SIZE 1024
152 class PTextToSpeech_SAPI
: public PTextToSpeech
154 PCLASSINFO(PTextToSpeech_SAPI
, PTextToSpeech
);
156 PTextToSpeech_SAPI();
157 ~PTextToSpeech_SAPI();
160 PStringArray
GetVoiceList();
161 BOOL
SetVoice(const PString
& voice
);
163 BOOL
SetRate(unsigned rate
);
166 BOOL
SetVolume(unsigned volume
);
167 unsigned GetVolume();
169 BOOL
OpenFile (const PFilePath
& fn
);
170 BOOL
OpenChannel(PChannel
* channel
);
171 BOOL
IsOpen() { return opened
; }
174 BOOL
Speak (const PString
& str
, TextType hint
);
179 static PMutex refMutex
;
180 static int * refCount
;
183 CComPtr
<ISpVoice
> m_cpVoice
;
184 CComPtr
<ISpStream
> cpWavStream
;
187 unsigned rate
, volume
;
191 PFactory
<PTextToSpeech
>::Worker
<PTextToSpeech_SAPI
> sapiTTSFactory("Microsoft SAPI", false);
193 int * PTextToSpeech_SAPI::refCount
;
194 PMutex
PTextToSpeech_SAPI::refMutex
;
200 PTextToSpeech_SAPI::PTextToSpeech_SAPI()
202 PWaitAndSignal
m(refMutex
);
204 if (refCount
== NULL
) {
207 ::CoInitializeEx(NULL
, COINIT_MULTITHREADED
);
212 usingFile
= opened
= FALSE
;
216 PTextToSpeech_SAPI::~PTextToSpeech_SAPI()
218 PWaitAndSignal
m(refMutex
);
220 if ((--(*refCount
)) == 0) {
227 BOOL
PTextToSpeech_SAPI::OpenVoice()
229 PWaitAndSignal
m(mutex
);
231 HRESULT hr
= m_cpVoice
.CoCreateInstance(CLSID_SpVoice
);
232 return (opened
= SUCCEEDED(hr
));
235 BOOL
PTextToSpeech_SAPI::OpenChannel(PChannel
*)
237 PWaitAndSignal
m(mutex
);
241 return (opened
= FALSE
);
245 BOOL
PTextToSpeech_SAPI::OpenFile(const PFilePath
& fn
)
247 PWaitAndSignal
m(mutex
);
255 CSpStreamFormat wavFormat
;
256 wavFormat
.AssignFormat(SPSF_8kHz16BitMono
);
258 WCHAR szwWavFileName
[MAX_FN_SIZE
] = L
"";;
261 wcscpy(szwWavFileName
, T2W((const char *)fn
));
262 HRESULT hr
= SPBindToFile(szwWavFileName
, SPFM_CREATE_ALWAYS
, &cpWavStream
, &wavFormat
.FormatId(), wavFormat
.WaveFormatExPtr());
264 if (!SUCCEEDED(hr
)) {
265 cpWavStream
.Release();
269 hr
= m_cpVoice
->SetOutput(cpWavStream
, TRUE
);
271 return (opened
= SUCCEEDED(hr
));
274 BOOL
PTextToSpeech_SAPI::Close()
276 PWaitAndSignal
m(mutex
);
283 m_cpVoice
->WaitUntilDone(INFINITE
);
284 cpWavStream
.Release();
296 BOOL
PTextToSpeech_SAPI::Speak(const PString
& otext
, TextType hint
)
298 PWaitAndSignal
m(mutex
);
303 PString text
= otext
;
305 // do various things to the string, depending upon the hint
316 // quick hack to calculate length of Unicode string
317 WCHAR
* uStr
= new WCHAR
[text
.GetLength()+1];
320 wcscpy(uStr
, T2W((const char *)text
));
322 HRESULT hr
= m_cpVoice
->Speak(uStr
, SPF_DEFAULT
, NULL
);
326 return SUCCEEDED(hr
);
329 PStringArray
PTextToSpeech_SAPI::GetVoiceList()
331 PWaitAndSignal
m(mutex
);
333 PStringArray voiceList
;
335 CComPtr
<ISpObjectToken
> cpVoiceToken
;
336 CComPtr
<IEnumSpObjectTokens
> cpEnum
;
339 //Enumerate the available voices
340 HRESULT hr
= SpEnumTokens(SPCAT_VOICES
, NULL
, NULL
, &cpEnum
);
342 // Get the number of voices
344 hr
= cpEnum
->GetCount(&ulCount
);
346 // Obtain a list of available voice tokens, set the voice to the token, and call Speak
347 while (SUCCEEDED(hr
) && ulCount
--) {
349 cpVoiceToken
.Release();
352 hr
= cpEnum
->Next(1, &cpVoiceToken
, NULL
);
355 voiceList
.AppendString("voice");
362 BOOL
PTextToSpeech_SAPI::SetVoice(const PString
& v
)
364 PWaitAndSignal
m(mutex
);
369 BOOL
PTextToSpeech_SAPI::SetRate(unsigned v
)
375 unsigned PTextToSpeech_SAPI::GetRate()
380 BOOL
PTextToSpeech_SAPI::SetVolume(unsigned v
)
386 unsigned PTextToSpeech_SAPI::GetVolume()
394 ////////////////////////////////////////////////////////////
396 // Generic text to speech using Festival
401 class PTextToSpeech_Festival
: public PTextToSpeech
403 PCLASSINFO(PTextToSpeech_Festival
, PTextToSpeech
);
405 PTextToSpeech_Festival();
406 ~PTextToSpeech_Festival();
409 PStringArray
GetVoiceList();
410 BOOL
SetVoice(const PString
& voice
);
412 BOOL
SetRate(unsigned rate
);
415 BOOL
SetVolume(unsigned volume
);
416 unsigned GetVolume();
418 BOOL
OpenFile (const PFilePath
& fn
);
419 BOOL
OpenChannel(PChannel
* channel
);
420 BOOL
IsOpen() { return opened
; }
423 BOOL
Speak (const PString
& str
, TextType hint
);
426 BOOL
Invoke(const PString
& str
, const PFilePath
& fn
);
433 unsigned volume
, rate
;
439 PFactory
<PTextToSpeech
>::Worker
<PTextToSpeech_Festival
> festivalTTSFactory("Festival", false);
441 PTextToSpeech_Festival::PTextToSpeech_Festival()
443 PWaitAndSignal
m(mutex
);
444 usingFile
= opened
= FALSE
;
450 PTextToSpeech_Festival::~PTextToSpeech_Festival()
452 PWaitAndSignal
m(mutex
);
455 BOOL
PTextToSpeech_Festival::OpenChannel(PChannel
*)
457 PWaitAndSignal
m(mutex
);
467 BOOL
PTextToSpeech_Festival::OpenFile(const PFilePath
& fn
)
469 PWaitAndSignal
m(mutex
);
476 PTRACE(3, "TTS\tWriting speech to " << fn
);
481 BOOL
PTextToSpeech_Festival::Close()
483 PWaitAndSignal
m(mutex
);
491 stat
= Invoke(text
, path
);
501 BOOL
PTextToSpeech_Festival::Speak(const PString
& ostr
, TextType hint
)
503 PWaitAndSignal
m(mutex
);
506 PTRACE(2, "TTS\tAttempt to speak whilst engine not open");
512 // do various things to the string, depending upon the hint
520 PTRACE(3, "TTS\tSpeaking " << ostr
);
525 PTRACE(1, "TTS\tStream mode not supported for Festival");
530 PStringArray
PTextToSpeech_Festival::GetVoiceList()
532 PWaitAndSignal
m(mutex
);
534 PStringArray voiceList
;
536 voiceList
.AppendString("default");
541 BOOL
PTextToSpeech_Festival::SetVoice(const PString
& v
)
543 PWaitAndSignal
m(mutex
);
548 BOOL
PTextToSpeech_Festival::SetRate(unsigned v
)
554 unsigned PTextToSpeech_Festival::GetRate()
559 BOOL
PTextToSpeech_Festival::SetVolume(unsigned v
)
565 unsigned PTextToSpeech_Festival::GetVolume()
570 BOOL
PTextToSpeech_Festival::Invoke(const PString
& otext
, const PFilePath
& fname
)
572 PString text
= otext
;
573 text
.Replace('\n', ' ', TRUE
);
574 text
.Replace('\"', '\'', TRUE
);
575 text
.Replace('\\', ' ', TRUE
);
576 text
= "\"" + text
+ "\"";
578 PString cmdLine
= "echo " + text
+ " | ./text2wave -F " + PString(PString::Unsigned
, rate
) + " -otype riff > " + fname
;
592 if (!cmd
.Open(cmdLine
, PPipeChannel::ReadWriteStd
)) {
593 PTRACE(1, "TTS\tCannot execute command " << cmd
);
595 PTRACE(3, "TTS\tCreating " << fname
<< " using " << cmdLine
);
597 code
= cmd
.WaitForTermination();
599 PTRACE(4, "TTS\tdata generated");
601 PTRACE(1, "TTS\tgeneration failed");
610 // End Of File ///////////////////////////////////////////////////////////////