Merge pull request #110 from tesselode/fixes
[wdl/wdl-ol.git] / WDL / convoengine.cpp
blob82856b686143068304e07ec69b89785a8209f5b0
1 /*
2 WDL - convoengine.cpp
3 Copyright (C) 2006 and later Cockos Incorporated
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
24 #ifdef _WIN32
25 #include <windows.h>
26 #endif
27 #include <math.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <memory.h>
31 #include "convoengine.h"
33 #include "denormal.h"
35 //#define TIMING
36 #include "timing.c"
38 #define CONVOENGINE_SILENCE_THRESH 1.0e-12 // -240dB
39 #define CONVOENGINE_IMPULSE_SILENCE_THRESH 1.0e-15 // -300dB
41 static void WDL_CONVO_CplxMul2(WDL_FFT_COMPLEX *c, WDL_FFT_COMPLEX *a, WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
43 WDL_FFT_REAL t1, t2, t3, t4, t5, t6, t7, t8;
44 if (n<2 || (n&1)) return;
46 do {
47 t1 = a[0].re * b[0].re;
48 t2 = a[0].im * b[0].im;
49 t3 = a[0].im * b[0].re;
50 t4 = a[0].re * b[0].im;
51 t5 = a[1].re * b[1].re;
52 t6 = a[1].im * b[1].im;
53 t7 = a[1].im * b[1].re;
54 t8 = a[1].re * b[1].im;
55 t1 -= t2;
56 t3 += t4;
57 t5 -= t6;
58 t7 += t8;
59 c[0].re = t1;
60 c[1].re = t5;
61 c[0].im = t3;
62 c[1].im = t7;
63 a += 2;
64 b += 2;
65 c += 2;
66 } while (n -= 2);
68 static void WDL_CONVO_CplxMul3(WDL_FFT_COMPLEX *c, WDL_FFT_COMPLEX *a, WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
70 WDL_FFT_REAL t1, t2, t3, t4, t5, t6, t7, t8;
71 if (n<2 || (n&1)) return;
73 do {
74 t1 = a[0].re * b[0].re;
75 t2 = a[0].im * b[0].im;
76 t3 = a[0].im * b[0].re;
77 t4 = a[0].re * b[0].im;
78 t5 = a[1].re * b[1].re;
79 t6 = a[1].im * b[1].im;
80 t7 = a[1].im * b[1].re;
81 t8 = a[1].re * b[1].im;
82 t1 -= t2;
83 t3 += t4;
84 t5 -= t6;
85 t7 += t8;
86 c[0].re += t1;
87 c[1].re += t5;
88 c[0].im += t3;
89 c[1].im += t7;
90 a += 2;
91 b += 2;
92 c += 2;
93 } while (n -= 2);
96 static bool CompareQueueToBuf(WDL_FastQueue *q, const void *data, int len)
98 int offs=0;
99 while (len>0)
101 void *td=NULL;
102 int sz=q->GetPtr(offs,&td);
103 if (sz<1) return true; // not enough data = not equal!
104 if (sz>len) sz=len;
106 int i=sz/sizeof(WDL_FFT_REAL);
107 WDL_FFT_REAL *a1=(WDL_FFT_REAL*)td;
108 WDL_FFT_REAL *b1=(WDL_FFT_REAL*)data;
109 while (i--)
111 if (fabs(*a1-*b1)>CONVOENGINE_SILENCE_THRESH) return true;
112 a1++;
113 b1++;
116 data = ((char *)data)+sz;
117 offs+=sz;
118 len-=sz;
120 return false;
124 WDL_ConvolutionEngine::WDL_ConvolutionEngine()
126 WDL_fft_init();
127 m_impulse_nch=1;
128 m_fft_size=0;
129 m_impulse_len=0;
130 m_proc_nch=0;
133 WDL_ConvolutionEngine::~WDL_ConvolutionEngine()
137 int WDL_ConvolutionEngine::SetImpulse(WDL_ImpulseBuffer *impulse, int fft_size, int impulse_sample_offset, int max_imp_size, bool forceBrute)
139 int impulse_len=0;
140 int x;
141 int nch=impulse->GetNumChannels();
142 for (x = 0; x < nch; x ++)
144 int l=impulse->impulses[x].GetSize()-impulse_sample_offset;
145 if (max_imp_size && l>max_imp_size) l=max_imp_size;
146 if (impulse_len < l) impulse_len=l;
148 m_impulse_nch=nch;
150 if (m_impulse_nch>1) // detect mono signals pretending to be multichannel
152 for (x = 1; x < m_impulse_nch; x ++)
154 if (impulse->impulses[x].GetSize()!=impulse->impulses[0].GetSize()||
155 memcmp(impulse->impulses[x].Get(),impulse->impulses[0].Get(),
156 impulse->impulses[0].GetSize()*sizeof(WDL_FFT_REAL)))
157 break;
159 if (x >= m_impulse_nch) m_impulse_nch=1;
162 m_impulse_len=impulse_len;
163 m_proc_nch=-1;
166 if (forceBrute)
168 m_fft_size=0;
170 // save impulse
171 for (x = 0; x < m_impulse_nch; x ++)
173 WDL_FFT_REAL *imp=impulse->impulses[x].Get()+impulse_sample_offset;
174 int lenout=impulse->impulses[x].GetSize()-impulse_sample_offset;
175 if (max_imp_size && lenout>max_imp_size) lenout=max_imp_size;
177 WDL_CONVO_IMPULSEBUFf *impout=m_impulse[x].Resize(lenout)+lenout;
178 while (lenout-->0) *--impout = (WDL_CONVO_IMPULSEBUFf) *imp++;
181 for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
183 m_samplesin[x].Clear();
184 m_samplesin2[x].Clear();
185 m_samplesout[x].Clear();
188 return 0;
192 if (fft_size<=0)
194 int msz=fft_size<=-16? -fft_size*2 : 32768;
196 fft_size=32;
197 while (fft_size < impulse_len*2 && fft_size < msz) fft_size*=2;
200 m_fft_size=fft_size;
202 int impchunksize=fft_size/2;
203 int nblocks=(impulse_len+impchunksize-1)/impchunksize;
204 //char buf[512];
205 //sprintf(buf,"il=%d, ffts=%d, cs=%d, nb=%d\n",impulse_len,fft_size,impchunksize,nblocks);
206 //OutputDebugString(buf);
208 const bool smallerSizeMode=sizeof(WDL_CONVO_IMPULSEBUFf)!=sizeof(WDL_FFT_REAL);
210 WDL_FFT_REAL scale=(WDL_FFT_REAL) (1.0/fft_size);
211 for (x = 0; x < m_impulse_nch; x ++)
213 WDL_FFT_REAL *imp=impulse->impulses[x].Get()+impulse_sample_offset;
215 WDL_FFT_REAL *imp2=x < m_impulse_nch-1 ? impulse->impulses[x+1].Get()+impulse_sample_offset : NULL;
217 WDL_CONVO_IMPULSEBUFf *impout=m_impulse[x].Resize((nblocks+!!smallerSizeMode)*fft_size*2);
218 char *zbuf=m_impulse_zflag[x].Resize(nblocks);
219 int lenout=impulse->impulses[x].GetSize()-impulse_sample_offset;
220 if (max_imp_size && lenout>max_imp_size) lenout=max_imp_size;
222 int bl;
223 for (bl = 0; bl < nblocks; bl ++)
226 int thissz=lenout;
227 if (thissz > impchunksize) thissz=impchunksize;
229 lenout -= thissz;
230 int i=0;
231 WDL_FFT_REAL mv=0.0;
232 WDL_FFT_REAL mv2=0.0;
233 WDL_FFT_REAL *imptmp = (WDL_FFT_REAL *)impout; //-V615
235 for (; i < thissz; i ++)
237 WDL_FFT_REAL v=*imp++;
238 WDL_FFT_REAL v2=(WDL_FFT_REAL)fabs(v);
239 if (v2 > mv) mv=v2;
241 imptmp[i*2]=denormal_filter_aggressive(v * scale);
243 if (imp2)
245 v=*imp2++;
246 v2=(WDL_FFT_REAL)fabs(v);
247 if (v2>mv2) mv2=v2;
248 imptmp[i*2+1]=denormal_filter_aggressive(v*scale);
250 else imptmp[i*2+1]=0.0;
252 for (; i < fft_size; i ++)
254 imptmp[i*2]=0.0;
255 imptmp[i*2+1]=0.0;
257 if (mv>CONVOENGINE_IMPULSE_SILENCE_THRESH||mv2>CONVOENGINE_IMPULSE_SILENCE_THRESH)
259 *zbuf++=mv>CONVOENGINE_IMPULSE_SILENCE_THRESH ? 2 : 1; // 1 means only second channel has content
260 WDL_fft((WDL_FFT_COMPLEX*)impout,fft_size,0);
262 if (smallerSizeMode)
264 int x,n=fft_size*2;
265 for(x=0;x<n;x++) impout[x]=(WDL_CONVO_IMPULSEBUFf)imptmp[x];
268 else *zbuf++=0;
270 impout+=fft_size*2;
273 return m_fft_size/2;
277 void WDL_ConvolutionEngine::Reset() // clears out any latent samples
279 int x;
280 memset(m_hist_pos,0,sizeof(m_hist_pos));
281 for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
283 m_samplesin[x].Clear();
284 m_samplesin2[x].Clear();
285 m_samplesout[x].Clear();
286 memset(m_samplehist_zflag[x].Get(),0,m_samplehist_zflag[x].GetSize());
287 memset(m_samplehist[x].Get(),0,m_samplehist[x].GetSize()*sizeof(WDL_FFT_REAL));
288 memset(m_overlaphist[x].Get(),0,m_overlaphist[x].GetSize()*sizeof(WDL_FFT_REAL));
292 void WDL_ConvolutionEngine::Add(WDL_FFT_REAL **bufs, int len, int nch)
294 if (m_fft_size<1)
296 int ch;
297 m_proc_nch=nch;
298 for (ch = 0; ch < nch; ch ++)
300 int wch=ch;
301 if (wch >=m_impulse_nch) wch-=m_impulse_nch;
302 WDL_CONVO_IMPULSEBUFf *imp=m_impulse[wch].Get();
303 int imp_len = m_impulse[wch].GetSize();
306 if (imp_len>0)
308 if (m_samplesin2[ch].Available()<imp_len*(int)sizeof(WDL_FFT_REAL))
310 int sza=imp_len*sizeof(WDL_FFT_REAL)-m_samplesin2[ch].Available();
311 memset(m_samplesin2[ch].Add(NULL,sza),0,sza);
313 WDL_FFT_REAL *psrc;
315 if (bufs && bufs[ch])
316 psrc=(WDL_FFT_REAL*)m_samplesin2[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
317 else
319 psrc=(WDL_FFT_REAL*)m_samplesin2[ch].Add(NULL,len*sizeof(WDL_FFT_REAL));
320 memset(psrc,0,len*sizeof(WDL_FFT_REAL));
323 WDL_FFT_REAL *pout=(WDL_FFT_REAL*)m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL));
324 int x;
325 int len1 = len&~1;
326 for (x=0; x < len1 ; x += 2)
328 int i=imp_len;
329 double sum=0.0,sum2=0.0;
330 WDL_FFT_REAL *sp=psrc+x-imp_len + 1;
331 WDL_CONVO_IMPULSEBUFf *ip=imp;
332 int j=i/4; i&=3;
333 while (j--) // produce 2 samples, 4 impulse samples at a time
335 double a = ip[0],b=ip[1],aa=ip[2],bb=ip[3];
336 double c = sp[1],d=sp[2],cc=sp[3];
337 sum+=a * sp[0] + b * c + aa * d + bb * cc;
338 sum2+=a * c + b * d + aa * cc + bb * sp[4];
339 ip+=4;
340 sp+=4;
343 while (i--)
345 double a = *ip++;
346 sum+=a * sp[0];
347 sum2+=a * sp[1];
348 sp++;
350 pout[x]=(WDL_FFT_REAL) sum;
351 pout[x+1]=(WDL_FFT_REAL) sum2;
353 for(;x<len;x++) // any odd samples left
355 int i=imp_len;
356 double sum=0.0;
357 WDL_FFT_REAL *sp=psrc+x-imp_len + 1;
358 WDL_CONVO_IMPULSEBUFf *ip=imp;
359 int j=i/4; i&=3;
360 while (j--)
362 sum+=ip[0] * sp[0] + ip[1] * sp[1] + ip[2] * sp[2] + ip[3] * sp[3];
363 ip+=4;
364 sp+=4;
367 while (i--) sum+=*ip++ * *sp++;
368 pout[x]=(WDL_FFT_REAL) sum;
370 m_samplesin2[ch].Advance(len*sizeof(WDL_FFT_REAL));
371 m_samplesin2[ch].Compact();
373 else
375 if (bufs && bufs[ch]) m_samplesout[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
376 else
378 memset(m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
383 return;
387 int impchunksize=m_fft_size/2;
388 int nblocks=(m_impulse_len+impchunksize-1)/impchunksize;
390 if (m_proc_nch != nch)
392 m_proc_nch=nch;
393 memset(m_hist_pos,0,sizeof(m_hist_pos));
394 int x;
395 int mso=0;
396 for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
398 int so=m_samplesin[x].Available() + m_samplesout[x].Available();
399 if (so>mso) mso=so;
401 if (x>=nch)
403 m_samplesin[x].Clear();
404 m_samplesout[x].Clear();
406 else
408 if (m_impulse_len<1||!nblocks)
410 if (m_samplesin[x].Available())
412 int s=m_samplesin[x].Available();
413 void *buf=m_samplesout[x].Add(NULL,s);
414 m_samplesin[x].GetToBuf(0,buf,s);
415 m_samplesin[x].Clear();
419 if (so < mso)
421 memset(m_samplesout[x].Add(NULL,mso-so),0,mso-so);
425 int sz=0;
426 if (x<nch) sz=nblocks*m_fft_size;
428 memset(m_samplehist_zflag[x].Resize(nblocks),0,nblocks);
429 m_samplehist[x].Resize(sz*2);
430 m_overlaphist[x].Resize(x<nch ? m_fft_size/2 : 0);
431 memset(m_samplehist[x].Get(),0,m_samplehist[x].GetSize()*sizeof(WDL_FFT_REAL));
432 memset(m_overlaphist[x].Get(),0,m_overlaphist[x].GetSize()*sizeof(WDL_FFT_REAL));
436 int ch;
437 if (m_impulse_len<1||!nblocks)
439 for (ch = 0; ch < nch; ch ++)
441 if (bufs && bufs[ch])
442 m_samplesout[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
443 else
444 memset(m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
446 // pass through
447 return;
450 for (ch = 0; ch < nch; ch ++)
452 if (!m_samplehist[ch].GetSize()||!m_overlaphist[ch].GetSize()) continue;
454 m_samplesin[ch].Add(bufs ? bufs[ch] : NULL,len*sizeof(WDL_FFT_REAL));
459 void WDL_ConvolutionEngine::AddSilenceToOutput(int len, int nch)
461 int x;
462 for(x=0;x<nch&&x<m_proc_nch;x++)
464 memset(m_samplesout[x].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
468 int WDL_ConvolutionEngine::Avail(int want)
470 if (m_fft_size<1)
472 return m_samplesout[0].Available()/sizeof(WDL_FFT_REAL);
475 const int sz=m_fft_size/2;
476 const int chunksize=m_fft_size/2;
477 const int nblocks=(m_impulse_len+chunksize-1)/chunksize;
478 // clear combining buffer
479 WDL_FFT_REAL *workbuf2 = m_combinebuf.Resize(m_fft_size*4); // temp space
481 int ch;
483 for (ch = 0; ch < m_proc_nch; ch ++)
485 if (!m_samplehist[ch].GetSize()||!m_overlaphist[ch].GetSize()) continue;
486 int srcc=ch;
487 if (srcc>=m_impulse_nch) srcc=m_impulse_nch-1;
489 bool allow_mono_input_mode=true;
490 bool mono_impulse_mode=false;
492 if (m_impulse_nch==1 && ch<m_proc_nch-1 &&
493 m_samplehist[ch+1].GetSize()&&m_overlaphist[ch+1].GetSize() &&
494 m_samplesin[ch].Available()==m_samplesin[ch+1].Available() &&
495 m_samplesout[ch].Available()==m_samplesout[ch+1].Available()
497 { // 2x processing mode
498 mono_impulse_mode=true;
499 allow_mono_input_mode=false;
503 const int in_needed=sz;
505 // useSilentList[x] = 1 for mono signal, 2 for stereo, 0 for silent
506 char *useSilentList=m_samplehist_zflag[ch].GetSize()==nblocks ? m_samplehist_zflag[ch].Get() : NULL;
507 while (m_samplesin[ch].Available()/(int)sizeof(WDL_FFT_REAL) >= sz &&
508 m_samplesout[ch].Available() < want*(int)sizeof(WDL_FFT_REAL))
510 int histpos;
511 if ((histpos=++m_hist_pos[ch]) >= nblocks) histpos=m_hist_pos[ch]=0;
513 // get samples from input, to history
514 WDL_FFT_REAL *optr = m_samplehist[ch].Get()+histpos*m_fft_size*2;
516 m_samplesin[ch].GetToBuf(0,optr+sz,in_needed*sizeof(WDL_FFT_REAL));
517 m_samplesin[ch].Advance(in_needed*sizeof(WDL_FFT_REAL));
520 bool mono_input_mode=false;
522 bool nonzflag=false;
523 if (mono_impulse_mode)
525 if (++m_hist_pos[ch+1] >= nblocks) m_hist_pos[ch+1]=0;
526 m_samplesin[ch+1].GetToBuf(0,workbuf2,sz*sizeof(WDL_FFT_REAL));
527 m_samplesin[ch+1].Advance(sz*sizeof(WDL_FFT_REAL));
528 int i;
529 for (i = 0; i < sz; i ++) // unpack samples
531 WDL_FFT_REAL f = optr[i*2]=denormal_filter_aggressive(optr[sz+i]);
532 if (!nonzflag && (f<-CONVOENGINE_SILENCE_THRESH || f>CONVOENGINE_SILENCE_THRESH)) nonzflag=true;
533 f=optr[i*2+1]=denormal_filter_aggressive(workbuf2[i]);
534 if (!nonzflag && (f<-CONVOENGINE_SILENCE_THRESH || f>CONVOENGINE_SILENCE_THRESH)) nonzflag=true;
537 else
539 if (allow_mono_input_mode &&
540 ch < m_proc_nch-1 &&
541 srcc<m_impulse_nch-1 &&
542 !CompareQueueToBuf(&m_samplesin[ch+1],optr+sz,sz*sizeof(WDL_FFT_REAL))
545 mono_input_mode=true;
547 else
549 allow_mono_input_mode=false;
552 int i;
553 for (i = 0; i < sz; i ++) // unpack samples
555 WDL_FFT_REAL f=optr[i*2]=denormal_filter_aggressive(optr[sz+i]);
556 optr[i*2+1]=0.0;
557 if (!nonzflag && (f<-CONVOENGINE_SILENCE_THRESH || f>CONVOENGINE_SILENCE_THRESH)) nonzflag=true;
561 int i;
562 for (i = 1; mono_input_mode && i < nblocks; i ++) // start @ 1, since hist[histpos] is no longer used for here
564 int srchistpos = histpos-i;
565 if (srchistpos < 0) srchistpos += nblocks;
566 if (!useSilentList || useSilentList[srchistpos]==2) mono_input_mode=false;
569 if (nonzflag||!useSilentList) memset(optr+sz*2,0,sz*2*sizeof(WDL_FFT_REAL));
572 #ifdef WDLCONVO_ZL_ACCOUNTING
573 m_zl_fftcnt++;
574 #endif
576 if (nonzflag) WDL_fft((WDL_FFT_COMPLEX*)optr,m_fft_size,0);
578 if (useSilentList) useSilentList[histpos]=nonzflag ? (mono_input_mode ? 1 : 2) : 0;
580 int mzfl=2;
581 if (mono_input_mode)
583 mzfl=1;
585 m_samplesin[ch+1].Advance(sz*sizeof(WDL_FFT_REAL));
587 // save a valid copy in sample hist incase we switch from mono to stereo
588 if (++m_hist_pos[ch+1] >= nblocks) m_hist_pos[ch+1]=0;
589 WDL_FFT_REAL *optr2 = m_samplehist[ch+1].Get()+m_hist_pos[ch+1]*m_fft_size*2;
590 memcpy(optr2,optr,m_fft_size*2*sizeof(WDL_FFT_REAL));
593 int applycnt=0;
594 char *useImpSilentList=m_impulse_zflag[srcc].GetSize() == nblocks ? m_impulse_zflag[srcc].Get() : NULL;
596 WDL_CONVO_IMPULSEBUFf *impulseptr=m_impulse[srcc].Get();
597 for (i = 0; i < nblocks; i ++, impulseptr+=m_fft_size*2)
599 int srchistpos = histpos-i;
600 if (srchistpos < 0) srchistpos += nblocks;
602 if (useImpSilentList && useImpSilentList[i]<mzfl) continue;
603 if (useSilentList && !useSilentList[srchistpos]) continue; // silent block
605 WDL_FFT_REAL *samplehist=m_samplehist[ch].Get() + m_fft_size*srchistpos*2;
607 if (applycnt++) // add to output
608 WDL_CONVO_CplxMul3((WDL_FFT_COMPLEX*)workbuf2,(WDL_FFT_COMPLEX*)samplehist,(WDL_CONVO_IMPULSEBUFCPLXf*)impulseptr,m_fft_size);
609 else // replace output
610 WDL_CONVO_CplxMul2((WDL_FFT_COMPLEX*)workbuf2,(WDL_FFT_COMPLEX*)samplehist,(WDL_CONVO_IMPULSEBUFCPLXf*)impulseptr,m_fft_size);
613 if (!applycnt)
614 memset(workbuf2,0,m_fft_size*2*sizeof(WDL_FFT_REAL));
615 else
616 WDL_fft((WDL_FFT_COMPLEX*)workbuf2,m_fft_size,1);
618 WDL_FFT_REAL *olhist=m_overlaphist[ch].Get(); // errors from last time
619 WDL_FFT_REAL *p1=workbuf2,*p3=workbuf2+m_fft_size,*p1o=workbuf2;
621 if (mono_impulse_mode||mono_input_mode)
623 WDL_FFT_REAL *p2o=workbuf2+m_fft_size*2;
624 WDL_FFT_REAL *olhist2=m_overlaphist[ch+1].Get(); // errors from last time
625 int s=sz/2;
626 while (s--)
628 p2o[0] = p1[1]+olhist2[0];
629 p2o[1] = p1[3]+olhist2[1];
630 p1o[0] = p1[0]+olhist[0];
631 p1o[1] = p1[2]+olhist[1];
632 p1o+=2;
633 p2o+=2;
634 p1+=4;
636 olhist[0]=p3[0];
637 olhist[1]=p3[2];
638 olhist2[0]=p3[1];
639 olhist2[1]=p3[3];
640 p3+=4;
642 olhist+=2;
643 olhist2+=2;
645 // add samples to output
646 m_samplesout[ch].Add(workbuf2,sz*sizeof(WDL_FFT_REAL));
647 m_samplesout[ch+1].Add(workbuf2+m_fft_size*2,sz*sizeof(WDL_FFT_REAL));
649 else
651 int s=sz/2;
652 while (s--)
654 p1o[0] = p1[0]+olhist[0];
655 p1o[1] = p1[2]+olhist[1];
656 p1o+=2;
657 p1+=4;
659 olhist[0]=p3[0];
660 olhist[1]=p3[2];
661 p3+=4;
663 olhist+=2;
665 // add samples to output
666 m_samplesout[ch].Add(workbuf2,sz*sizeof(WDL_FFT_REAL));
668 } // while available
670 if (mono_impulse_mode) ch++;
673 int mv = want;
674 for (ch=0;ch<m_proc_nch;ch++)
676 int v = m_samplesout[ch].Available()/sizeof(WDL_FFT_REAL);
677 if (!ch || v<mv)mv=v;
679 return mv;
682 WDL_FFT_REAL **WDL_ConvolutionEngine::Get()
684 int x;
685 for (x = 0; x < m_proc_nch; x ++)
687 m_get_tmpptrs[x]=(WDL_FFT_REAL *)m_samplesout[x].Get();
689 return m_get_tmpptrs;
692 void WDL_ConvolutionEngine::Advance(int len)
694 int x;
695 for (x = 0; x < m_proc_nch; x ++)
697 m_samplesout[x].Advance(len*sizeof(WDL_FFT_REAL));
698 m_samplesout[x].Compact();
704 /****************************************************************
705 ** low latency version
708 WDL_ConvolutionEngine_Div::WDL_ConvolutionEngine_Div()
710 timingInit();
711 m_proc_nch=2;
712 m_need_feedsilence=true;
715 int WDL_ConvolutionEngine_Div::SetImpulse(WDL_ImpulseBuffer *impulse, int maxfft_size, int known_blocksize, int max_imp_size, int impulse_offset, int latency_allowed)
717 m_need_feedsilence=true;
719 m_engines.Empty(true);
720 if (maxfft_size<0)maxfft_size=-maxfft_size;
721 maxfft_size*=2;
722 if (!maxfft_size || maxfft_size>32768) maxfft_size=32768;
725 const int MAX_SIZE_FOR_BRUTE=64;
727 int fftsize = MAX_SIZE_FOR_BRUTE;
728 int impulsechunksize = MAX_SIZE_FOR_BRUTE;
730 if (known_blocksize && !(known_blocksize&(known_blocksize-1)) && known_blocksize>MAX_SIZE_FOR_BRUTE*2)
732 fftsize=known_blocksize/2;
733 impulsechunksize=known_blocksize/2;
735 if (latency_allowed*2 > fftsize)
737 int x = 16;
738 while (x <= latency_allowed) x*=2;
739 if (x>32768) x=32768;
740 fftsize=impulsechunksize=x;
743 int offs=0;
744 int samplesleft=impulse->impulses[0].GetSize()-impulse_offset;
745 if (max_imp_size>0 && samplesleft>max_imp_size) samplesleft=max_imp_size;
749 WDL_ConvolutionEngine *eng=new WDL_ConvolutionEngine;
751 bool wantBrute = !latency_allowed && !offs;
752 if (impulsechunksize*(wantBrute ? 2 : 3) >= samplesleft) impulsechunksize=samplesleft; // early-out, no point going to a larger FFT (since if we did this, we wouldnt have enough samples for a complete next pass)
753 if (fftsize>=maxfft_size) { impulsechunksize=samplesleft; fftsize=maxfft_size; } // if FFTs are as large as possible, finish up
755 eng->SetImpulse(impulse,fftsize,offs+impulse_offset,impulsechunksize, wantBrute);
756 eng->m_zl_delaypos = offs;
757 eng->m_zl_dumpage=0;
758 m_engines.Add(eng);
760 #ifdef WDLCONVO_ZL_ACCOUNTING
761 char buf[512];
762 wsprintf(buf,"ce%d: offs=%d, len=%d, fftsize=%d\n",m_engines.GetSize(),offs,impulsechunksize,fftsize);
763 OutputDebugString(buf);
764 #endif
766 samplesleft -= impulsechunksize;
767 offs+=impulsechunksize;
769 #if 1 // this seems about 10% faster (maybe due to better cache use from less sized ffts used?)
770 impulsechunksize=offs*3;
771 fftsize=offs*2;
772 #else
773 impulsechunksize=fftsize;
775 fftsize*=2;
776 #endif
778 while (samplesleft > 0);
780 return GetLatency();
783 int WDL_ConvolutionEngine_Div::GetLatency()
785 return m_engines.GetSize() ? m_engines.Get(0)->GetLatency() : 0;
789 void WDL_ConvolutionEngine_Div::Reset()
791 int x;
792 for (x = 0; x < m_engines.GetSize(); x ++)
794 WDL_ConvolutionEngine *eng=m_engines.Get(x);
795 eng->Reset();
797 for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
799 m_samplesout[x].Clear();
802 m_need_feedsilence=true;
805 WDL_ConvolutionEngine_Div::~WDL_ConvolutionEngine_Div()
807 timingPrint();
808 m_engines.Empty(true);
811 void WDL_ConvolutionEngine_Div::Add(WDL_FFT_REAL **bufs, int len, int nch)
813 m_proc_nch=nch;
815 bool ns=m_need_feedsilence;
816 m_need_feedsilence=false;
818 int x;
819 for (x = 0; x < m_engines.GetSize(); x ++)
821 WDL_ConvolutionEngine *eng=m_engines.Get(x);
822 if (ns)
824 eng->m_zl_dumpage = (x>0 && x < m_engines.GetSize()-1) ? (eng->GetLatency()/4) : 0; // reduce max number of ffts per block by staggering them
826 if (eng->m_zl_dumpage>0)
827 eng->Add(NULL,eng->m_zl_dumpage,nch); // added silence to input (to control when fft happens)
830 eng->Add(bufs,len,nch);
832 if (ns) eng->AddSilenceToOutput(eng->m_zl_delaypos,nch); // add silence to output (to delay output to its correct time)
836 WDL_FFT_REAL **WDL_ConvolutionEngine_Div::Get()
838 int x;
839 for (x = 0; x < m_proc_nch; x ++)
841 m_get_tmpptrs[x]=(WDL_FFT_REAL *)m_samplesout[x].Get();
843 return m_get_tmpptrs;
846 void WDL_ConvolutionEngine_Div::Advance(int len)
848 int x;
849 for (x = 0; x < m_proc_nch; x ++)
851 m_samplesout[x].Advance(len*sizeof(WDL_FFT_REAL));
852 m_samplesout[x].Compact();
856 int WDL_ConvolutionEngine_Div::Avail(int wantSamples)
858 timingEnter(1);
859 int wso=wantSamples;
860 int x;
861 #ifdef WDLCONVO_ZL_ACCOUNTING
862 int cnt=0;
863 static int maxcnt=-1;
864 int h=0;
865 #endif
866 for (x = 0; x < m_engines.GetSize(); x ++)
868 WDL_ConvolutionEngine *eng=m_engines.Get(x);
869 #ifdef WDLCONVO_ZL_ACCOUNTING
870 eng->m_zl_fftcnt=0;
871 #endif
872 int a=eng->Avail(wso+eng->m_zl_dumpage) - eng->m_zl_dumpage;
873 #ifdef WDLCONVO_ZL_ACCOUNTING
874 cnt += !!eng->m_zl_fftcnt;
876 #if 0
877 if (eng->m_zl_fftcnt)
878 h|=1<<x;
880 if (eng->m_zl_fftcnt && x==m_engines.GetSize()-1 && cnt>1)
882 char buf[512];
883 wsprintf(buf,"fft flags=%08x (%08x=max)\n",h,1<<x);
884 OutputDebugString(buf);
886 #endif
887 #endif
888 if (a < wantSamples) wantSamples=a;
891 #ifdef WDLCONVO_ZL_ACCOUNTING
892 static DWORD lastt=0;
893 if (cnt>maxcnt)maxcnt=cnt;
894 if (GetTickCount()>lastt+1000)
896 lastt=GetTickCount();
897 char buf[512];
898 wsprintf(buf,"maxcnt=%d\n",maxcnt);
899 OutputDebugString(buf);
900 maxcnt=-1;
902 #endif
903 if (wantSamples>0)
905 WDL_FFT_REAL *tp[WDL_CONVO_MAX_PROC_NCH];
906 for (x =0; x < m_proc_nch; x ++)
908 memset(tp[x]=(WDL_FFT_REAL*)m_samplesout[x].Add(NULL,wantSamples*sizeof(WDL_FFT_REAL)),0,wantSamples*sizeof(WDL_FFT_REAL));
911 for (x = 0; x < m_engines.GetSize(); x ++)
913 WDL_ConvolutionEngine *eng=m_engines.Get(x);
914 if (eng->m_zl_dumpage>0) { eng->Advance(eng->m_zl_dumpage); eng->m_zl_dumpage=0; }
916 WDL_FFT_REAL **p=eng->Get();
917 if (p)
919 int i;
920 for (i =0; i < m_proc_nch; i ++)
922 WDL_FFT_REAL *o=tp[i];
923 WDL_FFT_REAL *in=p[i];
924 int j=wantSamples;
925 while (j-->0) *o++ += *in++;
928 eng->Advance(wantSamples);
931 timingLeave(1);
933 int av=m_samplesout[0].Available()/sizeof(WDL_FFT_REAL);
934 return av>wso ? wso : av;
938 #ifdef WDL_TEST_CONVO
940 #include <stdio.h>
942 int main(int argc, char **argv)
944 if (argc!=5)
946 printf("usage: convoengine fftsize implen oneoffs pingoffs\n");
947 return -1;
950 int fftsize=atoi(argv[1]);
951 int implen = atoi(argv[2]);
952 int oneoffs = atoi(argv[3]);
953 int pingoffs=atoi(argv[4]);
955 if (implen < 1 || oneoffs < 0 || oneoffs >= implen || pingoffs < 0)
957 printf("invalid parameters\n");
958 return -1;
961 WDL_ImpulseBuffer imp;
962 imp.nch=1;
963 memset(imp.impulses[0].Resize(implen),0,implen*sizeof(WDL_FFT_REAL));
964 imp.impulses[0].Get()[oneoffs]=1.0;
967 #if WDL_TEST_CONVO==2
968 WDL_ConvolutionEngine_Div engine;
969 #else
970 WDL_ConvolutionEngine engine;
971 #endif
972 engine.SetImpulse(&imp,fftsize);
973 WDL_TypedBuf<WDL_FFT_REAL> m_tmpbuf;
974 memset(m_tmpbuf.Resize(pingoffs+1),0,pingoffs*sizeof(WDL_FFT_REAL));
975 m_tmpbuf.Get()[pingoffs]=1.0;
976 WDL_FFT_REAL *p=m_tmpbuf.Get();
977 engine.Add(&p,pingoffs+1,1);
979 p=m_tmpbuf.Resize(4096);
980 memset(p,0,m_tmpbuf.GetSize()*sizeof(WDL_FFT_REAL));
982 int avail;
983 while ((avail=engine.Avail(pingoffs+oneoffs + 8192)) < pingoffs+oneoffs + 8192)
985 engine.Add(&p,4096,1);
987 WDL_FFT_REAL **output = engine.Get();
988 if (!output || !*output)
990 printf("cant get output\n");
991 return -1;
993 int x;
994 for (x = 0; x < avail; x ++)
996 WDL_FFT_REAL val=output[0][x];
997 WDL_FFT_REAL expval = (x==pingoffs+oneoffs) ? 1.0:0.0;
998 if (fabs(val-expval)>0.000000001)
1000 printf("%d: %.4fdB - %f %f\n",x,log10(max(val,0.000000000001))*20.0 - log10(max(expval,0.000000000001))*20.0,val,expval);
1004 return 0;
1007 #endif
1010 int WDL_ImpulseBuffer::SetLength(int samples)
1012 int x;
1013 for(x=0;x<m_nch;x++)
1015 int cursz=impulses[x].GetSize();
1016 if (cursz!=samples)
1018 impulses[x].Resize(samples,false);
1020 if (impulses[x].GetSize()!=samples) // validate length!
1022 // ERROR! FREE ALL!
1023 for(x=0;x<WDL_CONVO_MAX_IMPULSE_NCH;x++) impulses[x].Resize(0);
1024 return 0;
1028 if (cursz<samples)
1029 memset(impulses[x].Get()+cursz,0,(samples-cursz)*sizeof(WDL_FFT_REAL));
1031 return impulses[0].GetSize();
1035 void WDL_ImpulseBuffer::SetNumChannels(int usench)
1037 if (usench<1) usench=1;
1038 else if (usench>WDL_CONVO_MAX_IMPULSE_NCH) usench=WDL_CONVO_MAX_IMPULSE_NCH;
1040 if (usench > m_nch)
1042 const int old_nch = m_nch;
1043 m_nch = usench;
1044 const int len = SetLength(GetLength());
1046 int x,ax=0;
1047 if (len>0) for(x=old_nch;x<usench;x++)
1049 memcpy(impulses[x].Get(),impulses[ax].Get(),len*sizeof(WDL_FFT_REAL)); // duplicate channels
1050 if (++ax>=old_nch) ax=0;
1053 else if (usench<m_nch)
1055 m_nch=usench;
1056 int x;
1057 for(x=usench;x<WDL_CONVO_MAX_IMPULSE_NCH;x++) impulses[x].Resize(0,false);