Fixed DevStudio 2003 build with memory check code.
[pwlib.git] / src / ptclib / ptts.cxx
blob9141cdff17e44f66ae46c3eb8af76471f116a836
1 /*
2 * ptts.cxx
4 * Text To Speech classes
6 * Portable Windows Library
8 * Copyright (c) 2002 Equivalence Pty. Ltd.
10 * The contents of this file are subject to the Mozilla Public License
11 * Version 1.0 (the "License"); you may not use this file except in
12 * compliance with the License. You may obtain a copy of the License at
13 * http://www.mozilla.org/MPL/
15 * Software distributed under the License is distributed on an "AS IS"
16 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
17 * the License for the specific language governing rights and limitations
18 * under the License.
20 * The Original Code is Portable Windows Library.
22 * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
24 * Contributor(s): ______________________________________.
26 * $Log$
27 * Revision 1.22 2007/09/08 11:34:28 rjongbloed
28 * Improved memory checking (leaks etc), especially when using MSVC debug library.
30 * Revision 1.21 2007/04/04 01:51:38 rjongbloed
31 * Reviewed and adjusted PTRACE log levels
32 * Now follows 1=error,2=warn,3=info,4+=debug
34 * Revision 1.20 2006/07/27 09:48:24 rjongbloed
35 * Fixed DevStudio 2005 compiler compatibility
37 * Revision 1.19 2006/06/21 03:28:44 csoutheren
38 * Various cleanups thanks for Frederic Heem
40 * Revision 1.18 2005/11/30 12:47:41 csoutheren
41 * Removed tabs, reformatted some code, and changed tags for Doxygen
43 * Revision 1.17 2005/01/04 08:09:42 csoutheren
44 * Fixed Linux configure problems
46 * Revision 1.16 2005/01/04 07:44:03 csoutheren
47 * More changes to implement the new configuration methodology, and also to
48 * attack the global static problem
50 * Revision 1.15 2004/10/23 10:56:15 ykiryanov
51 * Added ifdef _WIN32_WCE for PocketPC 2003 SDK port
53 * Revision 1.14 2004/07/12 09:17:20 csoutheren
54 * Fixed warnings and errors under Linux
56 * Revision 1.13 2004/07/06 10:12:54 csoutheren
57 * Added static integer o factory template to assist in ensuring factories are instantiated
59 * Revision 1.12 2004/06/30 12:17:05 rjongbloed
60 * Rewrite of plug in system to use single global variable for all factories to avoid all sorts
61 * of issues with startup orders and Windows DLL multiple instances.
63 * Revision 1.11 2004/06/19 09:02:32 csoutheren
64 * Added rate and volume to keep Festival happy
66 * Revision 1.10 2004/06/19 07:18:59 csoutheren
67 * Change TTS engine registration to use abstract factory code
69 * Revision 1.9 2004/04/09 06:52:17 rjongbloed
70 * Removed #pargma linker command for /delayload of DLL as documentations sais that
71 * you cannot do this.
73 * Revision 1.8 2004/02/23 23:52:20 csoutheren
74 * Added pragmas to avoid every Windows application needing to include libs explicitly
76 * Revision 1.7 2003/10/30 11:28:25 rjongbloed
77 * Added dircetory name for Speech API under Windows.
79 * Revision 1.6 2003/09/17 06:02:13 csoutheren
80 * Fixed windows header file problem caused by removing recursive headers
82 * Revision 1.5 2003/04/17 01:21:33 craigs
83 * Added import of pybuildopts to correctly detect if P_SAPI is set
85 * Revision 1.4 2003/04/16 08:00:19 robertj
86 * Windoes psuedo autoconf support
88 * Revision 1.3 2002/11/06 22:47:25 robertj
89 * Fixed header comment (copyright etc)
91 * Revision 1.2 2002/08/14 15:18:25 craigs
92 * Fixed Festval implementation
94 * Revision 1.1 2002/08/06 04:45:58 craigs
95 * Initial version
99 #ifdef __GNUC__
100 #pragma implementation "ptts.h"
101 #endif
103 #include "ptbuildopts.h"
105 ////////////////////////////////////////////////////////////
106 #include <ptlib/pfactory.h>
107 #include <ptclib/ptts.h>
109 PINSTANTIATE_FACTORY(PTextToSpeech, PString)
111 // WIN32 COM stuff must be first in file to compile properly
113 #if P_SAPI
115 #if defined(P_SAPI_LIBRARY)
116 #pragma comment(lib, P_SAPI_LIBRARY)
117 #endif
119 #ifndef _WIN32_DCOM
120 #define _WIN32_DCOM 1
121 #endif
123 #include <objbase.h>
124 #include <atlbase.h>
125 #include <objbase.h>
126 #include <windows.h>
127 #include <windowsx.h>
128 #include <sphelper.h>
130 #endif
132 ////////////////////////////////////////////////////////////
134 // this disables the winsock2 stuff in the Windows contain.h, to avoid header file problems
135 #define P_KNOCKOUT_WINSOCK2
137 #include <ptlib.h>
138 #include <ptlib/pipechan.h>
139 #include <ptclib/ptts.h>
142 ////////////////////////////////////////////////////////////
144 // Text to speech using Microsoft's Speech API (SAPI)
145 // Can be downloaded from http://www.microsoft.com/speech/download/sdk51
148 #if P_SAPI
150 #define MAX_FN_SIZE 1024
152 class PTextToSpeech_SAPI : public PTextToSpeech
154 PCLASSINFO(PTextToSpeech_SAPI, PTextToSpeech);
155 public:
156 PTextToSpeech_SAPI();
157 ~PTextToSpeech_SAPI();
159 // overrides
160 PStringArray GetVoiceList();
161 BOOL SetVoice(const PString & voice);
163 BOOL SetRate(unsigned rate);
164 unsigned GetRate();
166 BOOL SetVolume(unsigned volume);
167 unsigned GetVolume();
169 BOOL OpenFile (const PFilePath & fn);
170 BOOL OpenChannel(PChannel * channel);
171 BOOL IsOpen() { return opened; }
173 BOOL Close ();
174 BOOL Speak (const PString & str, TextType hint);
176 protected:
177 BOOL OpenVoice();
179 static PMutex refMutex;
180 static int * refCount;
182 PMutex mutex;
183 CComPtr<ISpVoice> m_cpVoice;
184 CComPtr<ISpStream> cpWavStream;
185 BOOL opened;
186 BOOL usingFile;
187 unsigned rate, volume;
188 PString voice;
191 PFactory<PTextToSpeech>::Worker<PTextToSpeech_SAPI> sapiTTSFactory("Microsoft SAPI", false);
193 int * PTextToSpeech_SAPI::refCount;
194 PMutex PTextToSpeech_SAPI::refMutex;
197 #define new PNEW
200 PTextToSpeech_SAPI::PTextToSpeech_SAPI()
202 PWaitAndSignal m(refMutex);
204 if (refCount == NULL) {
205 refCount = new int;
206 *refCount = 1;
207 ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
208 } else {
209 (*refCount)++;
212 usingFile = opened = FALSE;
216 PTextToSpeech_SAPI::~PTextToSpeech_SAPI()
218 PWaitAndSignal m(refMutex);
220 if ((--(*refCount)) == 0) {
221 ::CoUninitialize();
222 delete refCount;
223 refCount = NULL;
227 BOOL PTextToSpeech_SAPI::OpenVoice()
229 PWaitAndSignal m(mutex);
231 HRESULT hr = m_cpVoice.CoCreateInstance(CLSID_SpVoice);
232 return (opened = SUCCEEDED(hr));
235 BOOL PTextToSpeech_SAPI::OpenChannel(PChannel *)
237 PWaitAndSignal m(mutex);
239 Close();
240 usingFile = FALSE;
241 return (opened = FALSE);
245 BOOL PTextToSpeech_SAPI::OpenFile(const PFilePath & fn)
247 PWaitAndSignal m(mutex);
249 Close();
250 usingFile = TRUE;
252 if (!OpenVoice())
253 return FALSE;
255 CSpStreamFormat wavFormat;
256 wavFormat.AssignFormat(SPSF_8kHz16BitMono);
258 WCHAR szwWavFileName[MAX_FN_SIZE] = L"";;
260 USES_CONVERSION;
261 wcscpy(szwWavFileName, T2W((const char *)fn));
262 HRESULT hr = SPBindToFile(szwWavFileName, SPFM_CREATE_ALWAYS, &cpWavStream, &wavFormat.FormatId(), wavFormat.WaveFormatExPtr());
264 if (!SUCCEEDED(hr)) {
265 cpWavStream.Release();
266 return FALSE;
269 hr = m_cpVoice->SetOutput(cpWavStream, TRUE);
271 return (opened = SUCCEEDED(hr));
274 BOOL PTextToSpeech_SAPI::Close()
276 PWaitAndSignal m(mutex);
278 if (!opened)
279 return TRUE;
281 if (usingFile) {
282 if (opened)
283 m_cpVoice->WaitUntilDone(INFINITE);
284 cpWavStream.Release();
287 if (opened)
288 m_cpVoice.Release();
290 opened = FALSE;
292 return TRUE;
296 BOOL PTextToSpeech_SAPI::Speak(const PString & otext, TextType hint)
298 PWaitAndSignal m(mutex);
300 if (!IsOpen())
301 return FALSE;
303 PString text = otext;
305 // do various things to the string, depending upon the hint
306 switch (hint) {
307 case Digits:
310 break;
312 default:
316 // quick hack to calculate length of Unicode string
317 WCHAR * uStr = new WCHAR[text.GetLength()+1];
319 USES_CONVERSION;
320 wcscpy(uStr, T2W((const char *)text));
322 HRESULT hr = m_cpVoice->Speak(uStr, SPF_DEFAULT, NULL);
324 delete[] uStr;
326 return SUCCEEDED(hr);
329 PStringArray PTextToSpeech_SAPI::GetVoiceList()
331 PWaitAndSignal m(mutex);
333 PStringArray voiceList;
335 CComPtr<ISpObjectToken> cpVoiceToken;
336 CComPtr<IEnumSpObjectTokens> cpEnum;
337 ULONG ulCount = 0;
339 //Enumerate the available voices
340 HRESULT hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
342 // Get the number of voices
343 if (SUCCEEDED(hr))
344 hr = cpEnum->GetCount(&ulCount);
346 // Obtain a list of available voice tokens, set the voice to the token, and call Speak
347 while (SUCCEEDED(hr) && ulCount--) {
349 cpVoiceToken.Release();
351 if (SUCCEEDED(hr))
352 hr = cpEnum->Next(1, &cpVoiceToken, NULL );
354 if (SUCCEEDED(hr)) {
355 voiceList.AppendString("voice");
359 return voiceList;
362 BOOL PTextToSpeech_SAPI::SetVoice(const PString & v)
364 PWaitAndSignal m(mutex);
365 voice = v;
366 return TRUE;
369 BOOL PTextToSpeech_SAPI::SetRate(unsigned v)
371 rate = v;
372 return TRUE;
375 unsigned PTextToSpeech_SAPI::GetRate()
377 return rate;
380 BOOL PTextToSpeech_SAPI::SetVolume(unsigned v)
382 volume = v;
383 return TRUE;
386 unsigned PTextToSpeech_SAPI::GetVolume()
388 return volume;
391 #endif
392 // P_SAPI
394 ////////////////////////////////////////////////////////////
396 // Generic text to speech using Festival
399 #undef new
401 class PTextToSpeech_Festival : public PTextToSpeech
403 PCLASSINFO(PTextToSpeech_Festival, PTextToSpeech);
404 public:
405 PTextToSpeech_Festival();
406 ~PTextToSpeech_Festival();
408 // overrides
409 PStringArray GetVoiceList();
410 BOOL SetVoice(const PString & voice);
412 BOOL SetRate(unsigned rate);
413 unsigned GetRate();
415 BOOL SetVolume(unsigned volume);
416 unsigned GetVolume();
418 BOOL OpenFile (const PFilePath & fn);
419 BOOL OpenChannel(PChannel * channel);
420 BOOL IsOpen() { return opened; }
422 BOOL Close ();
423 BOOL Speak (const PString & str, TextType hint);
425 protected:
426 BOOL Invoke(const PString & str, const PFilePath & fn);
428 PMutex mutex;
429 BOOL opened;
430 BOOL usingFile;
431 PString text;
432 PFilePath path;
433 unsigned volume, rate;
434 PString voice;
437 #define new PNEW
439 PFactory<PTextToSpeech>::Worker<PTextToSpeech_Festival> festivalTTSFactory("Festival", false);
441 PTextToSpeech_Festival::PTextToSpeech_Festival()
443 PWaitAndSignal m(mutex);
444 usingFile = opened = FALSE;
445 rate = 8000;
446 volume = 100;
450 PTextToSpeech_Festival::~PTextToSpeech_Festival()
452 PWaitAndSignal m(mutex);
455 BOOL PTextToSpeech_Festival::OpenChannel(PChannel *)
457 PWaitAndSignal m(mutex);
459 Close();
460 usingFile = FALSE;
461 opened = FALSE;
463 return TRUE;
467 BOOL PTextToSpeech_Festival::OpenFile(const PFilePath & fn)
469 PWaitAndSignal m(mutex);
471 Close();
472 usingFile = TRUE;
473 path = fn;
474 opened = TRUE;
476 PTRACE(3, "TTS\tWriting speech to " << fn);
478 return TRUE;
481 BOOL PTextToSpeech_Festival::Close()
483 PWaitAndSignal m(mutex);
485 if (!opened)
486 return TRUE;
488 BOOL stat = FALSE;
490 if (usingFile)
491 stat = Invoke(text, path);
493 text = PString();
495 opened = FALSE;
497 return stat;
501 BOOL PTextToSpeech_Festival::Speak(const PString & ostr, TextType hint)
503 PWaitAndSignal m(mutex);
505 if (!IsOpen()) {
506 PTRACE(2, "TTS\tAttempt to speak whilst engine not open");
507 return FALSE;
510 PString str = ostr;
512 // do various things to the string, depending upon the hint
513 switch (hint) {
514 case Digits:
515 default:
519 if (usingFile) {
520 PTRACE(3, "TTS\tSpeaking " << ostr);
521 text = text & str;
522 return TRUE;
525 PTRACE(1, "TTS\tStream mode not supported for Festival");
527 return FALSE;
530 PStringArray PTextToSpeech_Festival::GetVoiceList()
532 PWaitAndSignal m(mutex);
534 PStringArray voiceList;
536 voiceList.AppendString("default");
538 return voiceList;
541 BOOL PTextToSpeech_Festival::SetVoice(const PString & v)
543 PWaitAndSignal m(mutex);
544 voice = v;
545 return TRUE;
548 BOOL PTextToSpeech_Festival::SetRate(unsigned v)
550 rate = v;
551 return TRUE;
554 unsigned PTextToSpeech_Festival::GetRate()
556 return rate;
559 BOOL PTextToSpeech_Festival::SetVolume(unsigned v)
561 volume = v;
562 return TRUE;
565 unsigned PTextToSpeech_Festival::GetVolume()
567 return volume;
570 BOOL PTextToSpeech_Festival::Invoke(const PString & otext, const PFilePath & fname)
572 PString text = otext;
573 text.Replace('\n', ' ', TRUE);
574 text.Replace('\"', '\'', TRUE);
575 text.Replace('\\', ' ', TRUE);
576 text = "\"" + text + "\"";
578 PString cmdLine = "echo " + text + " | ./text2wave -F " + PString(PString::Unsigned, rate) + " -otype riff > " + fname;
580 #if 1
582 #ifndef _WIN32_WCE
583 system(cmdLine);
584 #endif
586 return TRUE;
588 #else
590 PPipeChannel cmd;
591 int code = -1;
592 if (!cmd.Open(cmdLine, PPipeChannel::ReadWriteStd)) {
593 PTRACE(1, "TTS\tCannot execute command " << cmd);
594 } else {
595 PTRACE(3, "TTS\tCreating " << fname << " using " << cmdLine);
596 cmd.Execute();
597 code = cmd.WaitForTermination();
598 if (code >= 0) {
599 PTRACE(4, "TTS\tdata generated");
600 } else {
601 PTRACE(1, "TTS\tgeneration failed");
605 return code == 0;
607 #endif
610 // End Of File ///////////////////////////////////////////////////////////////