1 /***************************************************************************
2 * Copyright (C) 2005 by Olivier Goffart *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
20 #include "voicesignature.h"
23 #include <kconfiggroup.h>
29 #define PI (2.0 * asin(1.0))
33 #include <QtCore/QDate>
41 inline static float ABS(float X
)
43 return (X
>0) ? X
: -X
;
45 inline static int MAX(int X
, int Y
)
47 return (X
>Y
) ? X
: Y
;
49 inline static int MIN(int X
, int Y
)
51 return (X
<Y
) ? X
: Y
;
63 Complex (double re
): _re(re
), _im(0.0) {}
64 Complex (double re
, double im
): _re(re
), _im(im
) {}
65 double Re () const { return _re
; }
66 double Im () const { return _im
; }
67 void operator += (const Complex
& c
)
72 void operator -= (const Complex
& c
)
77 void operator *= (const Complex
& c
)
79 double reT
= c
._re
* _re
- c
._im
* _im
;
80 _im
= c
._re
* _im
+ c
._im
* _re
;
85 return Complex (-_re
, -_im
);
87 Complex
operator- (const Complex
& c
) const
89 return Complex (_re
- c
._re
, _im
- c
._im
);
91 Complex
operator+ (const Complex
& c
) const
93 return Complex (_re
+ c
._re
, _im
+ c
._im
);
95 Complex
operator* (const Complex
& c
) const
97 return Complex (_re
* c
._re
- _im
* c
._im
, _im
* c
._re
+ _re
* c
._im
);
99 double Mod () const { return sqrt (_re
* _re
+ _im
* _im
); }
101 static Complex
fromExp(double mod
, double arg
) { return Complex(mod
*cos(arg
) , mod
*sin(arg
)); }
107 static inline double hamming(uint n
, uint size
)
109 return HAMMING
? 0.54-0.46*cos( 2*PI
*n
/(size
-1) ) : 1;
113 static QVector
<double> fft(const Sound
& sound
, unsigned int start
, unsigned int stop
)
115 if(start
>=stop
|| sound
.size()==0)
116 return QVector
<double>();
118 //We need a sample with a size of a power of two
119 uint size
=stop
-start
;
120 unsigned short log2size
=0;
121 while( (1<<log2size
) < size
)
124 int diff
=(1<<log2size
) - size
;
125 if(diff
> size
/4 || 1<<log2size
> sound
.size() )
128 diff
=(1<<log2size
) - size
;
131 int start2
=start
-diff
/2;
132 int stop2
=start2
+ size
;
138 if(stop2
>sound
.size())
140 start2
-= stop2
- sound
.size();
149 //Generate an array to work in
150 QVector
<Complex
> samples(size
);
152 //Fill it with samples in the "reversed carry" order
154 for (uint f
= 0; f
< size
- 1; f
++)
156 samples
[f
]=sound
.at(start2
+rev_carry
)* hamming(rev_carry
, size
);
157 // KDEBUG(rev_carry);
158 int mask
= size
>>1; // N / 2
160 while (rev_carry
>= mask
)
162 rev_carry
-= mask
; // turn off this bit
167 samples
[size
-1]=sound
.at(start2
+size
-1)*hamming(size
-1, size
);
170 for(uint level
=0; level
< log2size
; level
++)
172 for( int k
=0; k
< (size
>>1) ; k
++)
174 uint indice1
= (k
<< (level
+1) ) % (size
-1); // (k*2*2^l)%(N-1)
175 uint indice2
= indice1
+ (1<<level
); // (k*2*2^l)%(N-1) + 2^l
177 uint coefW
= ( k
<< (level
+1) ) / (size
-1); // (k*2*2^l) div (N-1)
178 double Wexpn
=-2 * PI
* coefW
/ (2 << level
); // -2 pi n / 2^(l+1)
179 Complex W
=Complex::fromExp(1, Wexpn
) ;
182 //OPERATION BUTTERFLY
183 Complex a
=samples
[indice1
];
184 Complex b
=samples
[indice2
];
185 samples
[indice1
]=a
+W
*b
;
186 samples
[indice2
]=a
-W
*b
;
188 // kDebug() << "PAPILLON s_" << indice1 << " s_" << indice2 << " W_" << (2<<level) << "^" << coefW;
192 QVector
<double> result(size
);
193 for(uint f
=0;f
<size
;f
++)
195 result
[f
]=samples
[f
].Mod() / size
;
204 QVector
<double> VoiceSignature::fft(const Sound
& sound
, unsigned int start
, unsigned int stop
)
206 return KHotKeys::fft(sound
, start
, stop
);
207 /*QVector<double> result(8000);
208 for(int f=0; f<8000;f++)
212 for(uint x=start; x<stop; x++)
214 Complex s(sound.at(x));
215 double angle=-2*PI*f*x/8000;
216 s*= Complex( cos(angle) , sin(angle) );
219 result[f]= c.Mod()/(stop-start) ;
224 bool VoiceSignature::window(const Sound
& sound
, unsigned int *_start
, unsigned int *_stop
)
227 unsigned int length
=sound
.size();
228 uint unit
=WINDOW_UNIT
;
233 unsigned int start
=0 , stop
=0;
235 for(uint x
=0;x
<unit
;x
++)
237 moy
+=ABS(sound
.at(x
));
240 if(moy
>WINDOW_MINIMUM
*unit
)
243 for(uint x
=unit
; x
<length
; x
++)
245 if(moy
<WINDOW_MINIMUM
*unit
)
252 moy
+=ABS(sound
.at(x
));
253 moy
-=ABS(sound
.at(x
-unit
));
257 if(moy
>WINDOW_MINIMUM
*unit
&& isNoise
)
260 stop
=MIN(length
,stop
+WINDOW_MINIMUM_ECART
);
261 start
=MAX(0 ,start
-WINDOW_MINIMUM_ECART
);
270 //finally doesn't give better results
271 /*#define HZ_TO_MEL(F) (1127*log(1+(F)/700.0))
272 #define MEL_TO_HZ(M) ( ( exp((M)/1127.0) -1) *700 )*/
273 #define HZ_TO_MEL(F) (F)
274 #define MEL_TO_HZ(F) (F)
277 VoiceSignature::VoiceSignature(const Sound
& sound
)
279 static uint temp_wind
=0, temp_fft
=0, temp_moy
=0;
283 unsigned int start
, stop
;
284 if(!window(sound
,&start
,&stop
))
286 kWarning() << "No voice found in the sound" ;
290 temp_wind
+=t
.restart();
292 uint length
=stop
-start
;
294 for(int wind
=0; wind
<WINDOW_NUMBER
; wind
++)
296 unsigned int w_start
=MAX(start
, start
+ (int)((wind
- WINDOW_SUPER
)*length
/WINDOW_NUMBER
));
297 unsigned int w_stop
=MIN(stop
, start
+ (int)((wind
+1.0+WINDOW_SUPER
)*length
/WINDOW_NUMBER
));
300 QVector
<double> fourrier
=fft(sound
, w_start
,w_stop
);
302 temp_fft
+=t
.restart();
305 double mel_start
=HZ_TO_MEL(FFT_RANGE_INF
);
306 uint mel_stop
=HZ_TO_MEL(FFT_RANGE_SUP
);
308 for(int four
=0; four
<FOUR_NUMBER
; four
++)
310 unsigned int wf_start
=mel_start
+ four
*(mel_stop
-mel_start
)/FOUR_NUMBER
;
311 unsigned int wf_stop
=mel_start
+ (four
+1)*(mel_stop
-mel_start
)/FOUR_NUMBER
;
313 unsigned int f_start
=MEL_TO_HZ( wf_start
)*fourrier
.size()/sound
.fs();
314 unsigned int f_stop
=MEL_TO_HZ( wf_stop
)*fourrier
.size()/sound
.fs();
315 unsigned int f_size
=f_stop
-f_start
;
318 for(uint f
=f_start
; f
<f_stop
; f
++)
320 int freq
=f
*fourrier
.size()/sound
.fs();
321 nb
+=fourrier
[f
]*FFT_PONDERATION(freq
);
327 temp_moy
+=t
.restart();
331 // kDebug() << "wind: "<< temp_wind << " - fft: " << temp_fft << " - moy: " << temp_moy;
336 VoiceSignature::~VoiceSignature()
342 float VoiceSignature::diff(const VoiceSignature
&s1
, const VoiceSignature
&s2
)
344 if(s1
.isNull() || s2
.isNull())
348 for(int x
=0;x
<WINDOW_NUMBER
;x
++)
349 for(int y
=0;y
<FOUR_NUMBER
;y
++)
351 double d1
=s1
.data
[x
][y
]-s2
.data
[x
][y
];
352 result
+= d1
*d1
;//*pond[x][y];
358 // http://tcts.fpms.ac.be/cours/1005-08/speech/projects/2001/delfabro_henry_poitoux/
360 const int I
=WINDOW_NUMBER
;
361 const int J
=WINDOW_NUMBER
;
363 for(int f
=1;f
<=J
;f
++)
365 for(int f
=1;f
<=I
;f
++)
368 for(int i
=1;i
<=I
;i
++)
369 for(int j
=1;j
<=J
;j
++)
372 for(int f
=0;f
<FOUR_NUMBER
;f
++)
374 double d1
=s1
.data
[i
-1][f
]-s2
.data
[j
-1][f
];
375 d
+= d1
*d1
;//*pond[x][y];
378 g
[i
][j
]=qMin(qMin( g
[i
-1][j
]+d
, g
[i
][j
-1]+d
) , g
[i
-1][j
-1]+d
+d
);
381 return g
[I
][J
]/(I
+J
);
388 int VoiceSignature::size1()
390 return WINDOW_NUMBER
;
393 int VoiceSignature::size2()
398 QMap
<int, QMap
<int, double> > VoiceSignature::pond
;
402 void VoiceSignature::write(KConfigGroup
& cfg
, const QString
&key
) const
405 for(int x
=0;x
<WINDOW_NUMBER
;x
++)
406 for(int y
=0;y
<FOUR_NUMBER
;y
++)
408 sl
.append( QString::number(data
[x
][y
]) );
410 cfg
.writeEntry(key
,sl
);
413 void VoiceSignature::read(KConfigGroup
& cfg
, const QString
&key
)
415 QStringList sl
=cfg
.readEntry(key
, QStringList());
416 for(int x
=0;x
<WINDOW_NUMBER
;x
++)
417 for(int y
=0;y
<FOUR_NUMBER
;y
++)
419 data
[x
][y
]= sl
[x
*FOUR_NUMBER
+y
].toDouble();