Fixed incorrect usage of result (now object rather than scalar), thanks Michal Zygmun...
[pwlib.git] / src / ptclib / ptts.cxx
blobcb6663506fbe44020223cda2a08d73e9e47b0088
1 /*
2 * ptts.cxx
4 * Text To Speech classes
6 * Portable Windows Library
8 * Copyright (c) 2002 Equivalence Pty. Ltd.
10 * The contents of this file are subject to the Mozilla Public License
11 * Version 1.0 (the "License"); you may not use this file except in
12 * compliance with the License. You may obtain a copy of the License at
13 * http://www.mozilla.org/MPL/
15 * Software distributed under the License is distributed on an "AS IS"
16 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
17 * the License for the specific language governing rights and limitations
18 * under the License.
20 * The Original Code is Portable Windows Library.
22 * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
24 * Contributor(s): ______________________________________.
26 * $Log$
27 * Revision 1.11 2004/06/19 09:02:32 csoutheren
28 * Added rate and volume to keep Festival happy
30 * Revision 1.10 2004/06/19 07:18:59 csoutheren
31 * Change TTS engine registration to use abstract factory code
33 * Revision 1.9 2004/04/09 06:52:17 rjongbloed
34 * Removed #pargma linker command for /delayload of DLL as documentations sais that
35 * you cannot do this.
37 * Revision 1.8 2004/02/23 23:52:20 csoutheren
38 * Added pragmas to avoid every Windows application needing to include libs explicitly
40 * Revision 1.7 2003/10/30 11:28:25 rjongbloed
41 * Added dircetory name for Speech API under Windows.
43 * Revision 1.6 2003/09/17 06:02:13 csoutheren
44 * Fixed windows header file problem caused by removing recursive headers
46 * Revision 1.5 2003/04/17 01:21:33 craigs
47 * Added import of pybuildopts to correctly detect if P_SAPI is set
49 * Revision 1.4 2003/04/16 08:00:19 robertj
50 * Windoes psuedo autoconf support
52 * Revision 1.3 2002/11/06 22:47:25 robertj
53 * Fixed header comment (copyright etc)
55 * Revision 1.2 2002/08/14 15:18:25 craigs
56 * Fixed Festval implementation
58 * Revision 1.1 2002/08/06 04:45:58 craigs
59 * Initial version
63 #ifdef __GNUC__
64 #pragma implementation "ptts.h"
65 #endif
67 #include "ptbuildopts.h"
70 ////////////////////////////////////////////////////////////
72 // WIN32 COM stuff must be first in file to compile properly
74 #if P_SAPI
76 #if defined(P_SAPI_LIBRARY)
77 #pragma comment(lib, P_SAPI_LIBRARY)
78 #endif
80 #ifndef _WIN32_DCOM
81 #define _WIN32_DCOM 1
82 #endif
84 #include <objbase.h>
85 #include <atlbase.h>
86 #include <objbase.h>
87 #include <windows.h>
88 #include <windowsx.h>
89 #include <sphelper.h>
91 #endif
93 ////////////////////////////////////////////////////////////
95 // this disables the winsock2 stuff in the Windows contain.h, to avoid header file problems
96 #define P_KNOCKOUT_WINSOCK2
98 #include <ptlib.h>
99 #include <ptlib/pipechan.h>
100 #include <ptclib/ptts.h>
102 ////////////////////////////////////////////////////////////
104 // Text to speech using Microsoft's Speech API (SAPI)
105 // Can be downloaded from http://www.microsoft.com/speech/download/sdk51
108 #if P_SAPI
110 #define MAX_FN_SIZE 1024
112 class PTextToSpeech_SAPI : public PTextToSpeech
114 PCLASSINFO(PTextToSpeech_SAPI, PTextToSpeech);
115 public:
116 PTextToSpeech_SAPI();
117 ~PTextToSpeech_SAPI();
119 // overrides
120 PStringArray GetVoiceList();
121 BOOL SetVoice(const PString & voice);
123 BOOL SetRate(unsigned rate);
124 unsigned GetRate();
126 BOOL SetVolume(unsigned volume);
127 unsigned GetVolume();
129 BOOL OpenFile (const PFilePath & fn);
130 BOOL OpenChannel(PChannel * channel);
131 BOOL IsOpen() { return opened; }
133 BOOL Close ();
134 BOOL Speak (const PString & str, TextType hint);
136 protected:
137 BOOL OpenVoice();
139 static PMutex refMutex;
140 static int * refCount;
142 PMutex mutex;
143 CComPtr<ISpVoice> m_cpVoice;
144 CComPtr<ISpStream> cpWavStream;
145 BOOL opened;
146 BOOL usingFile;
147 unsigned rate, volume;
148 PString voice;
151 static PAbstractFactory<PTextToSpeech, PTextToSpeech_SAPI> sapiTTSFactory("Microsoft SAPI");
153 int * PTextToSpeech_SAPI::refCount;
154 PMutex PTextToSpeech_SAPI::refMutex;
157 PTextToSpeech_SAPI::PTextToSpeech_SAPI()
159 PWaitAndSignal m(refMutex);
161 if (refCount == NULL) {
162 refCount = new int;
163 *refCount = 1;
164 ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
165 } else {
166 (*refCount)++;
169 usingFile = opened = FALSE;
173 PTextToSpeech_SAPI::~PTextToSpeech_SAPI()
175 PWaitAndSignal m(refMutex);
177 if ((--(*refCount)) == 0) {
178 ::CoUninitialize();
179 delete refCount;
180 refCount = NULL;
184 BOOL PTextToSpeech_SAPI::OpenVoice()
186 PWaitAndSignal m(mutex);
188 HRESULT hr = m_cpVoice.CoCreateInstance(CLSID_SpVoice);
189 return (opened = SUCCEEDED(hr));
192 BOOL PTextToSpeech_SAPI::OpenChannel(PChannel *)
194 PWaitAndSignal m(mutex);
196 Close();
197 usingFile = FALSE;
198 return (opened = FALSE);
202 BOOL PTextToSpeech_SAPI::OpenFile(const PFilePath & fn)
204 PWaitAndSignal m(mutex);
206 Close();
207 usingFile = TRUE;
209 if (!OpenVoice())
210 return FALSE;
212 CSpStreamFormat wavFormat;
213 wavFormat.AssignFormat(SPSF_8kHz16BitMono);
215 WCHAR szwWavFileName[MAX_FN_SIZE] = L"";;
217 USES_CONVERSION;
218 wcscpy(szwWavFileName, T2W((const char *)fn));
219 HRESULT hr = SPBindToFile(szwWavFileName, SPFM_CREATE_ALWAYS, &cpWavStream, &wavFormat.FormatId(), wavFormat.WaveFormatExPtr());
221 if (!SUCCEEDED(hr)) {
222 cpWavStream.Release();
223 return FALSE;
226 hr = m_cpVoice->SetOutput(cpWavStream, TRUE);
228 return (opened = SUCCEEDED(hr));
231 BOOL PTextToSpeech_SAPI::Close()
233 PWaitAndSignal m(mutex);
235 if (!opened)
236 return TRUE;
238 if (usingFile) {
239 if (opened)
240 m_cpVoice->WaitUntilDone(INFINITE);
241 cpWavStream.Release();
244 if (opened)
245 m_cpVoice.Release();
247 opened = FALSE;
249 return TRUE;
253 BOOL PTextToSpeech_SAPI::Speak(const PString & otext, TextType hint)
255 PWaitAndSignal m(mutex);
257 if (!IsOpen())
258 return FALSE;
260 PString text = otext;
262 // do various things to the string, depending upon the hint
263 switch (hint) {
264 case Digits:
267 break;
269 default:
273 // quick hack to calculate length of Unicode string
274 unsigned short * uStr = new unsigned short[text.GetLength()+1];
276 USES_CONVERSION;
277 wcscpy(uStr, T2W((const char *)text));
279 HRESULT hr = m_cpVoice->Speak(uStr, SPF_DEFAULT, NULL);
281 delete[] uStr;
283 return SUCCEEDED(hr);
286 PStringArray PTextToSpeech_SAPI::GetVoiceList()
288 PWaitAndSignal m(mutex);
290 PStringArray voiceList;
292 CComPtr<ISpObjectToken> cpVoiceToken;
293 CComPtr<IEnumSpObjectTokens> cpEnum;
294 ULONG ulCount = 0;
296 //Enumerate the available voices
297 HRESULT hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
299 // Get the number of voices
300 if (SUCCEEDED(hr))
301 hr = cpEnum->GetCount(&ulCount);
303 // Obtain a list of available voice tokens, set the voice to the token, and call Speak
304 while (SUCCEEDED(hr) && ulCount--) {
306 cpVoiceToken.Release();
308 if (SUCCEEDED(hr))
309 hr = cpEnum->Next(1, &cpVoiceToken, NULL );
311 if (SUCCEEDED(hr)) {
312 voiceList.AppendString("voice");
316 return voiceList;
319 BOOL PTextToSpeech_SAPI::SetVoice(const PString & v)
321 PWaitAndSignal m(mutex);
322 voice = v;
323 return TRUE;
326 BOOL PTextToSpeech_SAPI::SetRate(unsigned v)
328 rate = v;
329 return TRUE;
332 unsigned PTextToSpeech_SAPI::GetRate()
334 return rate;
337 BOOL PTextToSpeech_SAPI::SetVolume(unsigned v)
339 volume = v;
340 return TRUE;
343 unsigned PTextToSpeech_SAPI::GetVolume()
345 return volume;
348 #endif
349 // P_SAPI
351 ////////////////////////////////////////////////////////////
353 // Generic text to speech using Festival
356 class PTextToSpeech_Festival : public PTextToSpeech
358 PCLASSINFO(PTextToSpeech_Festival, PTextToSpeech);
359 public:
360 PTextToSpeech_Festival();
361 ~PTextToSpeech_Festival();
363 // overrides
364 PStringArray GetVoiceList();
365 BOOL SetVoice(const PString & voice);
367 BOOL SetRate(unsigned rate);
368 unsigned GetRate();
370 BOOL SetVolume(unsigned volume);
371 unsigned GetVolume();
373 BOOL OpenFile (const PFilePath & fn);
374 BOOL OpenChannel(PChannel * channel);
375 BOOL IsOpen() { return opened; }
377 BOOL Close ();
378 BOOL Speak (const PString & str, TextType hint);
380 protected:
381 BOOL Invoke(const PString & str, const PFilePath & fn);
383 PMutex mutex;
384 BOOL opened;
385 BOOL usingFile;
386 PString text;
387 PFilePath path;
388 unsigned volume, rate;
389 PString voice;
392 static PAbstractFactory<PTextToSpeech, PTextToSpeech_Festival> festivalTTSFactory("Festival");
394 PTextToSpeech_Festival::PTextToSpeech_Festival()
396 PWaitAndSignal m(mutex);
397 usingFile = opened = FALSE;
398 rate = 8000;
399 volume = 100;
403 PTextToSpeech_Festival::~PTextToSpeech_Festival()
405 PWaitAndSignal m(mutex);
408 BOOL PTextToSpeech_Festival::OpenChannel(PChannel *)
410 PWaitAndSignal m(mutex);
412 Close();
413 usingFile = FALSE;
414 opened = FALSE;
416 return TRUE;
420 BOOL PTextToSpeech_Festival::OpenFile(const PFilePath & fn)
422 PWaitAndSignal m(mutex);
424 Close();
425 usingFile = TRUE;
426 path = fn;
427 opened = TRUE;
429 PTRACE(3, "TTS\tWriting speech to " << fn);
431 return TRUE;
434 BOOL PTextToSpeech_Festival::Close()
436 PWaitAndSignal m(mutex);
438 if (!opened)
439 return TRUE;
441 BOOL stat = FALSE;
443 if (usingFile)
444 stat = Invoke(text, path);
446 text = PString();
448 opened = FALSE;
450 return stat;
454 BOOL PTextToSpeech_Festival::Speak(const PString & ostr, TextType hint)
456 PWaitAndSignal m(mutex);
458 if (!IsOpen()) {
459 PTRACE(3, "TTS\tAttempt to speak whilst engine not open");
460 return FALSE;
463 PString str = ostr;
465 // do various things to the string, depending upon the hint
466 switch (hint) {
467 case Digits:
468 default:
472 if (usingFile) {
473 PTRACE(3, "TTS\tSpeaking " << ostr);
474 text = text & str;
475 return TRUE;
478 PTRACE(3, "TTS\tStream mode not supported for Festival");
480 return FALSE;
483 PStringArray PTextToSpeech_Festival::GetVoiceList()
485 PWaitAndSignal m(mutex);
487 PStringArray voiceList;
489 voiceList.AppendString("default");
491 return voiceList;
494 BOOL PTextToSpeech_Festival::SetVoice(const PString & v)
496 PWaitAndSignal m(mutex);
497 voice = v;
498 return TRUE;
501 BOOL PTextToSpeech_Festival::SetRate(unsigned v)
503 rate = v;
504 return TRUE;
507 unsigned PTextToSpeech_Festival::GetRate()
509 return rate;
512 BOOL PTextToSpeech_Festival::SetVolume(unsigned v)
514 volume = v;
515 return TRUE;
518 unsigned PTextToSpeech_Festival::GetVolume()
520 return volume;
523 BOOL PTextToSpeech_Festival::Invoke(const PString & otext, const PFilePath & fname)
525 PString text = otext;
526 text.Replace('\n', ' ', TRUE);
527 text.Replace('\"', '\'', TRUE);
528 text.Replace('\\', ' ', TRUE);
529 text = "\"" + text + "\"";
531 PString cmdLine = "echo " + text + " | ./text2wave -F " + PString(PString::Unsigned, rate) + " -otype riff > " + fname;
533 #if 1
535 system(cmdLine);
536 return TRUE;
538 #else
540 PPipeChannel cmd;
541 int code = -1;
542 if (!cmd.Open(cmdLine, PPipeChannel::ReadWriteStd)) {
543 PTRACE(2, "TTS\tCannot execute command " << cmd);
544 } else {
545 PTRACE(2, "TTS\tCreating " << fname << " using " << cmdLine);
546 cmd.Execute();
547 code = cmd.WaitForTermination();
548 if (code >= 0) {
549 PTRACE(2, "TTS\tdata generated");
550 } else {
551 PTRACE(2, "TTS\tgeneration failed");
555 return code == 0;
557 #endif
560 PINSTANTIATE_FACTORY(PTextToSpeech)
562 // End Of File ///////////////////////////////////////////////////////////////