not quite so much needs to be delayed to the init() function
[personal-kdebase.git] / workspace / khotkeys / libkhotkeysprivate / voicesignature.cpp
bloba0de8c6245509199038a02dc922ef6b013efeef1
1 /***************************************************************************
2 * Copyright (C) 2005 by Olivier Goffart *
3 * ogoffart@kde.org *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
20 #include "voicesignature.h"
21 #include "sound.h"
22 #include <kconfig.h>
23 #include <kconfiggroup.h>
25 #include <math.h>
26 #ifdef PI
27 #undef PI
28 #endif
29 #define PI (2.0 * asin(1.0))
32 #include <kdebug.h>
33 #include <QtCore/QDate>
35 #undef Complex
37 namespace KHotKeys
41 inline static float ABS(float X)
43 return (X>0) ? X : -X ;
45 inline static int MAX(int X , int Y)
47 return (X>Y) ? X : Y ;
49 inline static int MIN(int X , int Y)
51 return (X<Y) ? X : Y ;
59 class Complex
61 public:
62 Complex () {}
63 Complex (double re): _re(re), _im(0.0) {}
64 Complex (double re, double im): _re(re), _im(im) {}
65 double Re () const { return _re; }
66 double Im () const { return _im; }
67 void operator += (const Complex& c)
69 _re += c._re;
70 _im += c._im;
72 void operator -= (const Complex& c)
74 _re -= c._re;
75 _im -= c._im;
77 void operator *= (const Complex& c)
79 double reT = c._re * _re - c._im * _im;
80 _im = c._re * _im + c._im * _re;
81 _re = reT;
83 Complex operator- ()
85 return Complex (-_re, -_im);
87 Complex operator- (const Complex& c) const
89 return Complex (_re - c._re, _im - c._im);
91 Complex operator+ (const Complex& c) const
93 return Complex (_re + c._re, _im + c._im);
95 Complex operator* (const Complex& c) const
97 return Complex (_re * c._re - _im * c._im , _im * c._re + _re * c._im);
99 double Mod () const { return sqrt (_re * _re + _im * _im); }
101 static Complex fromExp(double mod, double arg) { return Complex(mod*cos(arg) , mod*sin(arg)); }
102 private:
103 double _re;
104 double _im;
107 static inline double hamming(uint n, uint size)
109 return HAMMING ? 0.54-0.46*cos( 2*PI*n /(size-1) ) : 1;
113 static QVector<double> fft(const Sound& sound, unsigned int start, unsigned int stop)
115 if(start>=stop || sound.size()==0)
116 return QVector<double>();
118 //We need a sample with a size of a power of two
119 uint size=stop-start;
120 unsigned short log2size=0;
121 while( (1<<log2size) < size )
122 log2size++;
124 int diff=(1<<log2size) - size;
125 if(diff > size/4 || 1<<log2size > sound.size() )
127 log2size--;
128 diff=(1<<log2size) - size;
130 size=1<<log2size;
131 int start2=start-diff/2;
132 int stop2=start2+ size;
133 if(start2<0)
135 stop2-=start2;
136 start2=0;
138 if(stop2>sound.size())
140 start2-= stop2 - sound.size();
141 stop2=sound.size();
142 if(start2<0)
144 stop2-=start2;
145 start2=0;
149 //Generate an array to work in
150 QVector<Complex> samples(size);
152 //Fill it with samples in the "reversed carry" order
153 int rev_carry = 0;
154 for (uint f = 0; f < size - 1; f++)
156 samples[f]=sound.at(start2+rev_carry)* hamming(rev_carry, size);
157 // KDEBUG(rev_carry);
158 int mask = size>>1; // N / 2
159 // add 1 backwards
160 while (rev_carry >= mask)
162 rev_carry -= mask; // turn off this bit
163 mask >>= 1;
165 rev_carry += mask;
167 samples[size-1]=sound.at(start2+size-1)*hamming(size-1, size);
169 //FFT
170 for(uint level=0; level < log2size; level++)
172 for( int k=0; k< (size>>1) ; k++)
174 uint indice1 = (k << (level+1) ) % (size-1); // (k*2*2^l)%(N-1)
175 uint indice2 = indice1 + (1<<level); // (k*2*2^l)%(N-1) + 2^l
177 uint coefW = ( k << (level+1) ) / (size-1); // (k*2*2^l) div (N-1)
178 double Wexpn=-2 * PI * coefW / (2 << level); // -2 pi n / 2^(l+1)
179 Complex W=Complex::fromExp(1, Wexpn) ;
182 //OPERATION BUTTERFLY
183 Complex a=samples[indice1];
184 Complex b=samples[indice2];
185 samples[indice1]=a+W*b;
186 samples[indice2]=a-W*b;
188 // kDebug() << "PAPILLON s_" << indice1 << " s_" << indice2 << " W_" << (2<<level) << "^" << coefW;
192 QVector<double> result(size);
193 for(uint f=0;f<size;f++)
195 result[f]=samples[f].Mod() / size;
198 return result;
204 QVector<double> VoiceSignature::fft(const Sound& sound, unsigned int start, unsigned int stop)
206 return KHotKeys::fft(sound, start, stop);
207 /*QVector<double> result(8000);
208 for(int f=0; f<8000;f++)
210 Complex c(0);
212 for(uint x=start; x<stop; x++)
214 Complex s(sound.at(x));
215 double angle=-2*PI*f*x/8000;
216 s*= Complex( cos(angle) , sin(angle) );
217 c+=s;
219 result[f]= c.Mod()/(stop-start) ;
221 return result;*/
224 bool VoiceSignature::window(const Sound& sound, unsigned int *_start, unsigned int *_stop)
226 bool isNoise=false;
227 unsigned int length=sound.size();
228 uint unit=WINDOW_UNIT;
229 if(length < unit )
230 return false;
232 //FenĂȘtrage
233 unsigned int start=0 , stop=0;
234 double moy=0;
235 for(uint x=0;x<unit;x++)
237 moy+=ABS(sound.at(x));
240 if(moy>WINDOW_MINIMUM*unit)
241 isNoise=true;
243 for(uint x=unit; x<length; x++)
245 if(moy<WINDOW_MINIMUM*unit)
247 if(stop==0)
248 start=x-unit/2;
250 else
251 stop=x-unit/2;
252 moy+=ABS(sound.at(x));
253 moy-=ABS(sound.at(x-unit));
257 if(moy>WINDOW_MINIMUM*unit && isNoise)
258 return false;
260 stop=MIN(length,stop+WINDOW_MINIMUM_ECART);
261 start=MAX(0 ,start-WINDOW_MINIMUM_ECART);
263 if(_start)
264 *_start=start;
265 if(_stop)
266 *_stop=stop;
267 return start<stop;
270 //finally doesn't give better results
271 /*#define HZ_TO_MEL(F) (1127*log(1+(F)/700.0))
272 #define MEL_TO_HZ(M) ( ( exp((M)/1127.0) -1) *700 )*/
273 #define HZ_TO_MEL(F) (F)
274 #define MEL_TO_HZ(F) (F)
277 VoiceSignature::VoiceSignature(const Sound& sound)
279 static uint temp_wind=0, temp_fft=0, temp_moy=0;
280 QTime t;
281 t.start();
283 unsigned int start , stop;
284 if(!window(sound,&start,&stop))
286 kWarning() << "No voice found in the sound" ;
287 return;
290 temp_wind+=t.restart();
292 uint length=stop-start;
294 for(int wind=0; wind<WINDOW_NUMBER; wind++)
296 unsigned int w_start=MAX(start, start+ (int)((wind - WINDOW_SUPER)*length/WINDOW_NUMBER));
297 unsigned int w_stop =MIN(stop , start+ (int)((wind+1.0+WINDOW_SUPER)*length/WINDOW_NUMBER));
300 QVector<double> fourrier=fft(sound, w_start,w_stop);
302 temp_fft+=t.restart();
304 //MEL conversion
305 double mel_start=HZ_TO_MEL(FFT_RANGE_INF);
306 uint mel_stop=HZ_TO_MEL(FFT_RANGE_SUP);
308 for(int four=0; four<FOUR_NUMBER; four++)
310 unsigned int wf_start=mel_start + four*(mel_stop-mel_start)/FOUR_NUMBER;
311 unsigned int wf_stop=mel_start + (four+1)*(mel_stop-mel_start)/FOUR_NUMBER;
313 unsigned int f_start=MEL_TO_HZ( wf_start )*fourrier.size()/sound.fs();
314 unsigned int f_stop=MEL_TO_HZ( wf_stop )*fourrier.size()/sound.fs();
315 unsigned int f_size=f_stop-f_start;
317 double nb=0;
318 for(uint f=f_start; f<f_stop; f++)
320 int freq=f*fourrier.size()/sound.fs();
321 nb+=fourrier[f]*FFT_PONDERATION(freq);
323 nb/=(f_size);
324 data[wind][four]=nb;
327 temp_moy+=t.restart();
331 // kDebug() << "wind: "<< temp_wind << " - fft: " << temp_fft << " - moy: " << temp_moy;
336 VoiceSignature::~VoiceSignature()
342 float VoiceSignature::diff(const VoiceSignature &s1, const VoiceSignature &s2)
344 if(s1.isNull() || s2.isNull())
345 return 1000000;
346 #if 0
347 double result=0;
348 for(int x=0;x<WINDOW_NUMBER;x++)
349 for(int y=0;y<FOUR_NUMBER;y++)
351 double d1=s1.data[x][y]-s2.data[x][y];
352 result+= d1*d1;//*pond[x][y];
354 return result;
355 #endif
357 //DTW
358 // http://tcts.fpms.ac.be/cours/1005-08/speech/projects/2001/delfabro_henry_poitoux/
360 const int I=WINDOW_NUMBER;
361 const int J=WINDOW_NUMBER;
362 double g[I+1][J+1];
363 for(int f=1;f<=J;f++)
364 g[0][f]=10000000;
365 for(int f=1;f<=I;f++)
366 g[f][0]=10000000;
367 g[0][0]=0;
368 for(int i=1;i<=I;i++)
369 for(int j=1;j<=J;j++)
371 double d=0;
372 for(int f=0;f<FOUR_NUMBER;f++)
374 double d1=s1.data[i-1][f]-s2.data[j-1][f];
375 d+= d1*d1;//*pond[x][y];
377 d=sqrt(d);
378 g[i][j]=qMin(qMin( g[i-1][j]+d, g[i][j-1]+d ) , g[i-1][j-1]+d+d );
381 return g[I][J]/(I+J);
388 int VoiceSignature::size1()
390 return WINDOW_NUMBER;
393 int VoiceSignature::size2()
395 return FOUR_NUMBER;
398 QMap<int, QMap<int, double> > VoiceSignature::pond;
402 void VoiceSignature::write(KConfigGroup& cfg, const QString &key) const
404 QStringList sl;
405 for(int x=0;x<WINDOW_NUMBER;x++)
406 for(int y=0;y<FOUR_NUMBER;y++)
408 sl.append( QString::number(data[x][y]) );
410 cfg.writeEntry(key,sl);
413 void VoiceSignature::read(KConfigGroup& cfg, const QString &key)
415 QStringList sl=cfg.readEntry(key, QStringList());
416 for(int x=0;x<WINDOW_NUMBER;x++)
417 for(int y=0;y<FOUR_NUMBER;y++)
419 data[x][y]= sl[x*FOUR_NUMBER+y].toDouble();