[IPLUG/EXAMPLES] IPlugResampler: qualification of min no longer needed
[wdl/wdl-ol.git] / WDL / convoengine.cpp
blob204ed35e1678d46bd8cb13dfa46fbaa8152f0a9b
1 /*
2 WDL - convoengine.cpp
3 Copyright (C) 2006 and later Cockos Incorporated
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
24 #ifdef _WIN32
25 #include <windows.h>
26 #endif
27 #include <math.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <memory.h>
31 #include "convoengine.h"
33 #include "denormal.h"
35 //#define TIMING
36 #include "timing.c"
38 static void WDL_CONVO_CplxMul2(WDL_FFT_COMPLEX *c, WDL_FFT_COMPLEX *a, WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
40 WDL_FFT_REAL t1, t2, t3, t4, t5, t6, t7, t8;
41 if (n<2 || (n&1)) return;
43 do {
44 t1 = a[0].re * b[0].re;
45 t2 = a[0].im * b[0].im;
46 t3 = a[0].im * b[0].re;
47 t4 = a[0].re * b[0].im;
48 t5 = a[1].re * b[1].re;
49 t6 = a[1].im * b[1].im;
50 t7 = a[1].im * b[1].re;
51 t8 = a[1].re * b[1].im;
52 t1 -= t2;
53 t3 += t4;
54 t5 -= t6;
55 t7 += t8;
56 c[0].re = t1;
57 c[1].re = t5;
58 c[0].im = t3;
59 c[1].im = t7;
60 a += 2;
61 b += 2;
62 c += 2;
63 } while (n -= 2);
65 static void WDL_CONVO_CplxMul3(WDL_FFT_COMPLEX *c, WDL_FFT_COMPLEX *a, WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
67 WDL_FFT_REAL t1, t2, t3, t4, t5, t6, t7, t8;
68 if (n<2 || (n&1)) return;
70 do {
71 t1 = a[0].re * b[0].re;
72 t2 = a[0].im * b[0].im;
73 t3 = a[0].im * b[0].re;
74 t4 = a[0].re * b[0].im;
75 t5 = a[1].re * b[1].re;
76 t6 = a[1].im * b[1].im;
77 t7 = a[1].im * b[1].re;
78 t8 = a[1].re * b[1].im;
79 t1 -= t2;
80 t3 += t4;
81 t5 -= t6;
82 t7 += t8;
83 c[0].re += t1;
84 c[1].re += t5;
85 c[0].im += t3;
86 c[1].im += t7;
87 a += 2;
88 b += 2;
89 c += 2;
90 } while (n -= 2);
93 static bool CompareQueueToBuf(WDL_FastQueue *q, const void *data, int len)
95 int offs=0;
96 while (len>0)
98 void *td=NULL;
99 int sz=q->GetPtr(offs,&td);
100 if (sz<1) return true; // not enough data = not equal!
101 if (sz>len) sz=len;
103 int i=sz/sizeof(WDL_FFT_REAL);
104 WDL_FFT_REAL *a1=(WDL_FFT_REAL*)td;
105 WDL_FFT_REAL *b1=(WDL_FFT_REAL*)data;
106 while (i--)
108 if (fabs(*a1-*b1)>1.0e-7) return true;
109 a1++;
110 b1++;
113 data = ((char *)data)+sz;
114 offs+=sz;
115 len-=sz;
117 return false;
121 WDL_ConvolutionEngine::WDL_ConvolutionEngine()
123 WDL_fft_init();
124 m_impulse_nch=1;
125 m_fft_size=0;
126 m_impulse_len=0;
127 m_proc_nch=0;
130 WDL_ConvolutionEngine::~WDL_ConvolutionEngine()
134 int WDL_ConvolutionEngine::SetImpulse(WDL_ImpulseBuffer *impulse, int fft_size, int impulse_sample_offset, int max_imp_size, bool forceBrute)
136 int impulse_len=0;
137 int x;
138 int nch=impulse->GetNumChannels();
139 for (x = 0; x < nch; x ++)
141 int l=impulse->impulses[x].GetSize()-impulse_sample_offset;
142 if (max_imp_size && l>max_imp_size) l=max_imp_size;
143 if (impulse_len < l) impulse_len=l;
145 m_impulse_nch=nch;
147 if (m_impulse_nch>1) // detect mono signals pretending to be multichannel
149 for (x = 1; x < m_impulse_nch; x ++)
151 if (impulse->impulses[x].GetSize()!=impulse->impulses[0].GetSize()||
152 memcmp(impulse->impulses[x].Get(),impulse->impulses[0].Get(),
153 impulse->impulses[0].GetSize()*sizeof(WDL_FFT_REAL)))
154 break;
156 if (x >= m_impulse_nch) m_impulse_nch=1;
159 m_impulse_len=impulse_len;
160 m_proc_nch=-1;
163 if (forceBrute)
165 m_fft_size=0;
167 // save impulse
168 for (x = 0; x < m_impulse_nch; x ++)
170 WDL_FFT_REAL *imp=impulse->impulses[x].Get()+impulse_sample_offset;
171 int lenout=impulse->impulses[x].GetSize()-impulse_sample_offset;
172 if (max_imp_size && lenout>max_imp_size) lenout=max_imp_size;
174 WDL_CONVO_IMPULSEBUFf *impout=m_impulse[x].Resize(lenout)+lenout;
175 while (lenout-->0) *--impout = (WDL_CONVO_IMPULSEBUFf) *imp++;
178 for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
180 m_samplesin[x].Clear();
181 m_samplesin2[x].Clear();
182 m_samplesout[x].Clear();
185 return 0;
189 if (fft_size<=0)
191 int msz=fft_size<=-16? -fft_size*2 : 32768;
193 fft_size=32;
194 while (fft_size < impulse_len*2 && fft_size < msz) fft_size*=2;
197 m_fft_size=fft_size;
199 int impchunksize=fft_size/2;
200 int nblocks=(impulse_len+impchunksize-1)/impchunksize;
201 //char buf[512];
202 //sprintf(buf,"il=%d, ffts=%d, cs=%d, nb=%d\n",impulse_len,fft_size,impchunksize,nblocks);
203 //OutputDebugString(buf);
205 const bool smallerSizeMode=sizeof(WDL_CONVO_IMPULSEBUFf)!=sizeof(WDL_FFT_REAL);
207 WDL_FFT_REAL scale=(WDL_FFT_REAL) (1.0/fft_size);
208 for (x = 0; x < m_impulse_nch; x ++)
210 WDL_FFT_REAL *imp=impulse->impulses[x].Get()+impulse_sample_offset;
212 WDL_FFT_REAL *imp2=x < m_impulse_nch-1 ? impulse->impulses[x+1].Get()+impulse_sample_offset : NULL;
214 WDL_CONVO_IMPULSEBUFf *impout=m_impulse[x].Resize((nblocks+!!smallerSizeMode)*fft_size*2);
215 char *zbuf=m_impulse_zflag[x].Resize(nblocks);
216 int lenout=impulse->impulses[x].GetSize()-impulse_sample_offset;
217 if (max_imp_size && lenout>max_imp_size) lenout=max_imp_size;
219 int bl;
220 for (bl = 0; bl < nblocks; bl ++)
223 int thissz=lenout;
224 if (thissz > impchunksize) thissz=impchunksize;
226 lenout -= thissz;
227 int i=0;
228 WDL_FFT_REAL mv=0.0;
229 WDL_FFT_REAL mv2=0.0;
230 WDL_FFT_REAL *imptmp = (WDL_FFT_REAL *)impout; //-V615
232 for (; i < thissz; i ++)
234 WDL_FFT_REAL v=*imp++;
235 WDL_FFT_REAL v2=(WDL_FFT_REAL)fabs(v);
236 if (v2 > mv) mv=v2;
238 imptmp[i*2]=denormal_filter_aggressive(v * scale);
240 if (imp2)
242 v=*imp2++;
243 v2=(WDL_FFT_REAL)fabs(v);
244 if (v2>mv2) mv2=v2;
245 imptmp[i*2+1]=denormal_filter_aggressive(v*scale);
247 else imptmp[i*2+1]=0.0;
249 for (; i < fft_size; i ++)
251 imptmp[i*2]=0.0;
252 imptmp[i*2+1]=0.0;
254 if (mv>1.0e-14||mv2>1.0e-14)
256 *zbuf++=mv>1.0e-14 ? 2 : 1; // 1 means only second channel has content
257 WDL_fft((WDL_FFT_COMPLEX*)impout,fft_size,0);
259 if (smallerSizeMode)
261 int x,n=fft_size*2;
262 for(x=0;x<n;x++) impout[x]=(WDL_CONVO_IMPULSEBUFf)imptmp[x];
265 else *zbuf++=0;
267 impout+=fft_size*2;
270 return m_fft_size/2;
274 void WDL_ConvolutionEngine::Reset() // clears out any latent samples
276 int x;
277 memset(m_hist_pos,0,sizeof(m_hist_pos));
278 for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
280 m_samplesin[x].Clear();
281 m_samplesin2[x].Clear();
282 m_samplesout[x].Clear();
283 memset(m_samplehist_zflag[x].Get(),0,m_samplehist_zflag[x].GetSize());
284 memset(m_samplehist[x].Get(),0,m_samplehist[x].GetSize()*sizeof(WDL_FFT_REAL));
285 memset(m_overlaphist[x].Get(),0,m_overlaphist[x].GetSize()*sizeof(WDL_FFT_REAL));
289 void WDL_ConvolutionEngine::Add(WDL_FFT_REAL **bufs, int len, int nch)
291 if (m_fft_size<1)
293 int ch;
294 m_proc_nch=nch;
295 for (ch = 0; ch < nch; ch ++)
297 int wch=ch;
298 if (wch >=m_impulse_nch) wch-=m_impulse_nch;
299 WDL_CONVO_IMPULSEBUFf *imp=m_impulse[wch].Get();
300 int imp_len = m_impulse[wch].GetSize();
303 if (imp_len>0)
305 if (m_samplesin2[ch].Available()<imp_len*(int)sizeof(WDL_FFT_REAL))
307 int sza=imp_len*sizeof(WDL_FFT_REAL)-m_samplesin2[ch].Available();
308 memset(m_samplesin2[ch].Add(NULL,sza),0,sza);
310 WDL_FFT_REAL *psrc;
312 if (bufs && bufs[ch])
313 psrc=(WDL_FFT_REAL*)m_samplesin2[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
314 else
316 psrc=(WDL_FFT_REAL*)m_samplesin2[ch].Add(NULL,len*sizeof(WDL_FFT_REAL));
317 memset(psrc,0,len*sizeof(WDL_FFT_REAL));
320 WDL_FFT_REAL *pout=(WDL_FFT_REAL*)m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL));
321 int x;
322 int len1 = len&~1;
323 for (x=0; x < len1 ; x += 2)
325 int i=imp_len;
326 double sum=0.0,sum2=0.0;
327 WDL_FFT_REAL *sp=psrc+x-imp_len + 1;
328 WDL_CONVO_IMPULSEBUFf *ip=imp;
329 int j=i/4; i&=3;
330 while (j--) // produce 2 samples, 4 impulse samples at a time
332 double a = ip[0],b=ip[1],aa=ip[2],bb=ip[3];
333 double c = sp[1],d=sp[2],cc=sp[3];
334 sum+=a * sp[0] + b * c + aa * d + bb * cc;
335 sum2+=a * c + b * d + aa * cc + bb * sp[4];
336 ip+=4;
337 sp+=4;
340 while (i--)
342 double a = *ip++;
343 sum+=a * sp[0];
344 sum2+=a * sp[1];
345 sp++;
347 pout[x]=(WDL_FFT_REAL) sum;
348 pout[x+1]=(WDL_FFT_REAL) sum2;
350 for(;x<len;x++) // any odd samples left
352 int i=imp_len;
353 double sum=0.0;
354 WDL_FFT_REAL *sp=psrc+x-imp_len + 1;
355 WDL_CONVO_IMPULSEBUFf *ip=imp;
356 int j=i/4; i&=3;
357 while (j--)
359 sum+=ip[0] * sp[0] + ip[1] * sp[1] + ip[2] * sp[2] + ip[3] * sp[3];
360 ip+=4;
361 sp+=4;
364 while (i--) sum+=*ip++ * *sp++;
365 pout[x]=(WDL_FFT_REAL) sum;
367 m_samplesin2[ch].Advance(len*sizeof(WDL_FFT_REAL));
368 m_samplesin2[ch].Compact();
370 else
372 if (bufs && bufs[ch]) m_samplesout[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
373 else
375 memset(m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
380 return;
384 int impchunksize=m_fft_size/2;
385 int nblocks=(m_impulse_len+impchunksize-1)/impchunksize;
387 if (m_proc_nch != nch)
389 m_proc_nch=nch;
390 memset(m_hist_pos,0,sizeof(m_hist_pos));
391 int x;
392 int mso=0;
393 for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
395 int so=m_samplesin[x].Available() + m_samplesout[x].Available();
396 if (so>mso) mso=so;
398 if (x>=nch)
400 m_samplesin[x].Clear();
401 m_samplesout[x].Clear();
403 else
405 if (m_impulse_len<1||!nblocks)
407 if (m_samplesin[x].Available())
409 int s=m_samplesin[x].Available();
410 void *buf=m_samplesout[x].Add(NULL,s);
411 m_samplesin[x].GetToBuf(0,buf,s);
412 m_samplesin[x].Clear();
416 if (so < mso)
418 memset(m_samplesout[x].Add(NULL,mso-so),0,mso-so);
422 int sz=0;
423 if (x<nch) sz=nblocks*m_fft_size;
425 memset(m_samplehist_zflag[x].Resize(nblocks),0,nblocks);
426 m_samplehist[x].Resize(sz*2);
427 m_overlaphist[x].Resize(x<nch ? m_fft_size/2 : 0);
428 memset(m_samplehist[x].Get(),0,m_samplehist[x].GetSize()*sizeof(WDL_FFT_REAL));
429 memset(m_overlaphist[x].Get(),0,m_overlaphist[x].GetSize()*sizeof(WDL_FFT_REAL));
433 int ch;
434 if (m_impulse_len<1||!nblocks)
436 for (ch = 0; ch < nch; ch ++)
438 if (bufs && bufs[ch])
439 m_samplesout[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
440 else
441 memset(m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
443 // pass through
444 return;
447 for (ch = 0; ch < nch; ch ++)
449 if (!m_samplehist[ch].GetSize()||!m_overlaphist[ch].GetSize()) continue;
451 m_samplesin[ch].Add(bufs ? bufs[ch] : NULL,len*sizeof(WDL_FFT_REAL));
456 void WDL_ConvolutionEngine::AddSilenceToOutput(int len, int nch)
458 int x;
459 for(x=0;x<nch&&x<m_proc_nch;x++)
461 memset(m_samplesout[x].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
465 int WDL_ConvolutionEngine::Avail(int want)
467 if (m_fft_size<1)
469 return m_samplesout[0].Available()/sizeof(WDL_FFT_REAL);
472 const int sz=m_fft_size/2;
473 const int chunksize=m_fft_size/2;
474 const int nblocks=(m_impulse_len+chunksize-1)/chunksize;
475 // clear combining buffer
476 WDL_FFT_REAL *workbuf2 = m_combinebuf.Resize(m_fft_size*4); // temp space
478 int ch;
480 for (ch = 0; ch < m_proc_nch; ch ++)
482 if (!m_samplehist[ch].GetSize()||!m_overlaphist[ch].GetSize()) continue;
483 int srcc=ch;
484 if (srcc>=m_impulse_nch) srcc=m_impulse_nch-1;
486 bool allow_mono_input_mode=true;
487 bool mono_impulse_mode=false;
489 if (m_impulse_nch==1 && ch<m_proc_nch-1 &&
490 m_samplehist[ch+1].GetSize()&&m_overlaphist[ch+1].GetSize() &&
491 m_samplesin[ch].Available()==m_samplesin[ch+1].Available() &&
492 m_samplesout[ch].Available()==m_samplesout[ch+1].Available()
494 { // 2x processing mode
495 mono_impulse_mode=true;
496 allow_mono_input_mode=false;
500 const int in_needed=sz;
502 // useSilentList[x] = 1 for mono signal, 2 for stereo, 0 for silent
503 char *useSilentList=m_samplehist_zflag[ch].GetSize()==nblocks ? m_samplehist_zflag[ch].Get() : NULL;
504 while (m_samplesin[ch].Available()/(int)sizeof(WDL_FFT_REAL) >= sz &&
505 m_samplesout[ch].Available() < want*(int)sizeof(WDL_FFT_REAL))
507 int histpos;
508 if ((histpos=++m_hist_pos[ch]) >= nblocks) histpos=m_hist_pos[ch]=0;
510 // get samples from input, to history
511 WDL_FFT_REAL *optr = m_samplehist[ch].Get()+histpos*m_fft_size*2;
513 m_samplesin[ch].GetToBuf(0,optr+sz,in_needed*sizeof(WDL_FFT_REAL));
514 m_samplesin[ch].Advance(in_needed*sizeof(WDL_FFT_REAL));
517 bool mono_input_mode=false;
519 bool nonzflag=false;
520 if (mono_impulse_mode)
522 if (++m_hist_pos[ch+1] >= nblocks) m_hist_pos[ch+1]=0;
523 m_samplesin[ch+1].GetToBuf(0,workbuf2,sz*sizeof(WDL_FFT_REAL));
524 m_samplesin[ch+1].Advance(sz*sizeof(WDL_FFT_REAL));
525 int i;
526 for (i = 0; i < sz; i ++) // unpack samples
528 WDL_FFT_REAL f = optr[i*2]=denormal_filter_aggressive(optr[sz+i]);
529 if (!nonzflag && (f<-1.0e-6 || f>1.0e-6)) nonzflag=true;
530 f=optr[i*2+1]=denormal_filter_aggressive(workbuf2[i]);
531 if (!nonzflag && (f<-1.0e-6 || f>1.0e-6)) nonzflag=true;
534 else
536 if (allow_mono_input_mode &&
537 ch < m_proc_nch-1 &&
538 srcc<m_impulse_nch-1 &&
539 !CompareQueueToBuf(&m_samplesin[ch+1],optr+sz,sz*sizeof(WDL_FFT_REAL))
542 mono_input_mode=true;
544 else
546 allow_mono_input_mode=false;
549 int i;
550 for (i = 0; i < sz; i ++) // unpack samples
552 WDL_FFT_REAL f=optr[i*2]=denormal_filter_aggressive(optr[sz+i]);
553 optr[i*2+1]=0.0;
554 if (!nonzflag && (f<-1.0e-6 || f>1.0e-6)) nonzflag=true;
558 int i;
559 for (i = 1; mono_input_mode && i < nblocks; i ++) // start @ 1, since hist[histpos] is no longer used for here
561 int srchistpos = histpos-i;
562 if (srchistpos < 0) srchistpos += nblocks;
563 if (useSilentList[srchistpos]==2) mono_input_mode=false;
566 if (nonzflag||!useSilentList) memset(optr+sz*2,0,sz*2*sizeof(WDL_FFT_REAL));
569 #ifdef WDLCONVO_ZL_ACCOUNTING
570 m_zl_fftcnt++;
571 #endif
573 if (nonzflag) WDL_fft((WDL_FFT_COMPLEX*)optr,m_fft_size,0);
575 if (useSilentList) useSilentList[histpos]=nonzflag ? (mono_input_mode ? 1 : 2) : 0;
577 int mzfl=2;
578 if (mono_input_mode)
580 mzfl=1;
582 m_samplesin[ch+1].Advance(sz*sizeof(WDL_FFT_REAL));
584 // save a valid copy in sample hist incase we switch from mono to stereo
585 if (++m_hist_pos[ch+1] >= nblocks) m_hist_pos[ch+1]=0;
586 WDL_FFT_REAL *optr2 = m_samplehist[ch+1].Get()+m_hist_pos[ch+1]*m_fft_size*2;
587 memcpy(optr2,optr,m_fft_size*2*sizeof(WDL_FFT_REAL));
590 int applycnt=0;
591 char *useImpSilentList=m_impulse_zflag[srcc].GetSize() == nblocks ? m_impulse_zflag[srcc].Get() : NULL;
593 WDL_CONVO_IMPULSEBUFf *impulseptr=m_impulse[srcc].Get();
594 for (i = 0; i < nblocks; i ++, impulseptr+=m_fft_size*2)
596 int srchistpos = histpos-i;
597 if (srchistpos < 0) srchistpos += nblocks;
599 if (useImpSilentList && useImpSilentList[i]<mzfl) continue;
600 if (useSilentList && !useSilentList[srchistpos]) continue; // silent block
602 WDL_FFT_REAL *samplehist=m_samplehist[ch].Get() + m_fft_size*srchistpos*2;
604 if (applycnt++) // add to output
605 WDL_CONVO_CplxMul3((WDL_FFT_COMPLEX*)workbuf2,(WDL_FFT_COMPLEX*)samplehist,(WDL_CONVO_IMPULSEBUFCPLXf*)impulseptr,m_fft_size);
606 else // replace output
607 WDL_CONVO_CplxMul2((WDL_FFT_COMPLEX*)workbuf2,(WDL_FFT_COMPLEX*)samplehist,(WDL_CONVO_IMPULSEBUFCPLXf*)impulseptr,m_fft_size);
610 if (!applycnt)
611 memset(workbuf2,0,m_fft_size*2*sizeof(WDL_FFT_REAL));
612 else
613 WDL_fft((WDL_FFT_COMPLEX*)workbuf2,m_fft_size,1);
615 WDL_FFT_REAL *olhist=m_overlaphist[ch].Get(); // errors from last time
616 WDL_FFT_REAL *p1=workbuf2,*p3=workbuf2+m_fft_size,*p1o=workbuf2;
618 if (mono_impulse_mode||mono_input_mode)
620 WDL_FFT_REAL *p2o=workbuf2+m_fft_size*2;
621 WDL_FFT_REAL *olhist2=m_overlaphist[ch+1].Get(); // errors from last time
622 int s=sz/2;
623 while (s--)
625 p2o[0] = p1[1]+olhist2[0];
626 p2o[1] = p1[3]+olhist2[1];
627 p1o[0] = p1[0]+olhist[0];
628 p1o[1] = p1[2]+olhist[1];
629 p1o+=2;
630 p2o+=2;
631 p1+=4;
633 olhist[0]=p3[0];
634 olhist[1]=p3[2];
635 olhist2[0]=p3[1];
636 olhist2[1]=p3[3];
637 p3+=4;
639 olhist+=2;
640 olhist2+=2;
642 // add samples to output
643 m_samplesout[ch].Add(workbuf2,sz*sizeof(WDL_FFT_REAL));
644 m_samplesout[ch+1].Add(workbuf2+m_fft_size*2,sz*sizeof(WDL_FFT_REAL));
646 else
648 int s=sz/2;
649 while (s--)
651 p1o[0] = p1[0]+olhist[0];
652 p1o[1] = p1[2]+olhist[1];
653 p1o+=2;
654 p1+=4;
656 olhist[0]=p3[0];
657 olhist[1]=p3[2];
658 p3+=4;
660 olhist+=2;
662 // add samples to output
663 m_samplesout[ch].Add(workbuf2,sz*sizeof(WDL_FFT_REAL));
665 } // while available
667 if (mono_impulse_mode) ch++;
670 int mv = want;
671 for (ch=0;ch<m_proc_nch;ch++)
673 int v = m_samplesout[ch].Available()/sizeof(WDL_FFT_REAL);
674 if (!ch || v<mv)mv=v;
676 return mv;
679 WDL_FFT_REAL **WDL_ConvolutionEngine::Get()
681 int x;
682 for (x = 0; x < m_proc_nch; x ++)
684 m_get_tmpptrs[x]=(WDL_FFT_REAL *)m_samplesout[x].Get();
686 return m_get_tmpptrs;
689 void WDL_ConvolutionEngine::Advance(int len)
691 int x;
692 for (x = 0; x < m_proc_nch; x ++)
694 m_samplesout[x].Advance(len*sizeof(WDL_FFT_REAL));
695 m_samplesout[x].Compact();
701 /****************************************************************
702 ** low latency version
705 WDL_ConvolutionEngine_Div::WDL_ConvolutionEngine_Div()
707 timingInit();
708 m_proc_nch=2;
709 m_need_feedsilence=true;
712 int WDL_ConvolutionEngine_Div::SetImpulse(WDL_ImpulseBuffer *impulse, int maxfft_size, int known_blocksize, int max_imp_size, int impulse_offset, int latency_allowed)
714 m_need_feedsilence=true;
716 m_engines.Empty(true);
717 if (maxfft_size<0)maxfft_size=-maxfft_size;
718 maxfft_size*=2;
719 if (!maxfft_size || maxfft_size>32768) maxfft_size=32768;
722 const int MAX_SIZE_FOR_BRUTE=64;
724 int fftsize = MAX_SIZE_FOR_BRUTE;
725 int impulsechunksize = MAX_SIZE_FOR_BRUTE;
727 if (known_blocksize && !(known_blocksize&(known_blocksize-1)) && known_blocksize>MAX_SIZE_FOR_BRUTE*2)
729 fftsize=known_blocksize/2;
730 impulsechunksize=known_blocksize/2;
732 if (latency_allowed*2 > fftsize)
734 int x = 16;
735 while (x <= latency_allowed) x*=2;
736 if (x>32768) x=32768;
737 fftsize=impulsechunksize=x;
740 int offs=0;
741 int samplesleft=impulse->impulses[0].GetSize()-impulse_offset;
742 if (max_imp_size>0 && samplesleft>max_imp_size) samplesleft=max_imp_size;
746 WDL_ConvolutionEngine *eng=new WDL_ConvolutionEngine;
748 bool wantBrute = !latency_allowed && !offs;
749 if (impulsechunksize*(wantBrute ? 2 : 3) >= samplesleft) impulsechunksize=samplesleft; // early-out, no point going to a larger FFT (since if we did this, we wouldnt have enough samples for a complete next pass)
750 if (fftsize>=maxfft_size) { impulsechunksize=samplesleft; fftsize=maxfft_size; } // if FFTs are as large as possible, finish up
752 eng->SetImpulse(impulse,fftsize,offs+impulse_offset,impulsechunksize, wantBrute);
753 eng->m_zl_delaypos = offs;
754 eng->m_zl_dumpage=0;
755 m_engines.Add(eng);
757 #ifdef WDLCONVO_ZL_ACCOUNTING
758 char buf[512];
759 wsprintf(buf,"ce%d: offs=%d, len=%d, fftsize=%d\n",m_engines.GetSize(),offs,impulsechunksize,fftsize);
760 OutputDebugString(buf);
761 #endif
763 samplesleft -= impulsechunksize;
764 offs+=impulsechunksize;
766 #if 1 // this seems about 10% faster (maybe due to better cache use from less sized ffts used?)
767 impulsechunksize=offs*3;
768 fftsize=offs*2;
769 #else
770 impulsechunksize=fftsize;
772 fftsize*=2;
773 #endif
775 while (samplesleft > 0);
777 return GetLatency();
780 int WDL_ConvolutionEngine_Div::GetLatency()
782 return m_engines.GetSize() ? m_engines.Get(0)->GetLatency() : 0;
786 void WDL_ConvolutionEngine_Div::Reset()
788 int x;
789 for (x = 0; x < m_engines.GetSize(); x ++)
791 WDL_ConvolutionEngine *eng=m_engines.Get(x);
792 eng->Reset();
794 for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
796 m_samplesout[x].Clear();
799 m_need_feedsilence=true;
802 WDL_ConvolutionEngine_Div::~WDL_ConvolutionEngine_Div()
804 timingPrint();
805 m_engines.Empty(true);
808 void WDL_ConvolutionEngine_Div::Add(WDL_FFT_REAL **bufs, int len, int nch)
810 m_proc_nch=nch;
812 bool ns=m_need_feedsilence;
813 m_need_feedsilence=false;
815 int x;
816 for (x = 0; x < m_engines.GetSize(); x ++)
818 WDL_ConvolutionEngine *eng=m_engines.Get(x);
819 if (ns)
821 eng->m_zl_dumpage = (x>0 && x < m_engines.GetSize()-1) ? (eng->GetLatency()/4) : 0; // reduce max number of ffts per block by staggering them
823 if (eng->m_zl_dumpage>0)
824 eng->Add(NULL,eng->m_zl_dumpage,nch); // added silence to input (to control when fft happens)
827 eng->Add(bufs,len,nch);
829 if (ns) eng->AddSilenceToOutput(eng->m_zl_delaypos,nch); // add silence to output (to delay output to its correct time)
833 WDL_FFT_REAL **WDL_ConvolutionEngine_Div::Get()
835 int x;
836 for (x = 0; x < m_proc_nch; x ++)
838 m_get_tmpptrs[x]=(WDL_FFT_REAL *)m_samplesout[x].Get();
840 return m_get_tmpptrs;
843 void WDL_ConvolutionEngine_Div::Advance(int len)
845 int x;
846 for (x = 0; x < m_proc_nch; x ++)
848 m_samplesout[x].Advance(len*sizeof(WDL_FFT_REAL));
849 m_samplesout[x].Compact();
853 int WDL_ConvolutionEngine_Div::Avail(int wantSamples)
855 timingEnter(1);
856 int wso=wantSamples;
857 int x;
858 #ifdef WDLCONVO_ZL_ACCOUNTING
859 int cnt=0;
860 static int maxcnt=-1;
861 int h=0;
862 #endif
863 for (x = 0; x < m_engines.GetSize(); x ++)
865 WDL_ConvolutionEngine *eng=m_engines.Get(x);
866 #ifdef WDLCONVO_ZL_ACCOUNTING
867 eng->m_zl_fftcnt=0;
868 #endif
869 int a=eng->Avail(wso+eng->m_zl_dumpage) - eng->m_zl_dumpage;
870 #ifdef WDLCONVO_ZL_ACCOUNTING
871 cnt += !!eng->m_zl_fftcnt;
873 #if 0
874 if (eng->m_zl_fftcnt)
875 h|=1<<x;
877 if (eng->m_zl_fftcnt && x==m_engines.GetSize()-1 && cnt>1)
879 char buf[512];
880 wsprintf(buf,"fft flags=%08x (%08x=max)\n",h,1<<x);
881 OutputDebugString(buf);
883 #endif
884 #endif
885 if (a < wantSamples) wantSamples=a;
888 #ifdef WDLCONVO_ZL_ACCOUNTING
889 static DWORD lastt=0;
890 if (cnt>maxcnt)maxcnt=cnt;
891 if (GetTickCount()>lastt+1000)
893 lastt=GetTickCount();
894 char buf[512];
895 wsprintf(buf,"maxcnt=%d\n",maxcnt);
896 OutputDebugString(buf);
897 maxcnt=-1;
899 #endif
900 if (wantSamples>0)
902 WDL_FFT_REAL *tp[WDL_CONVO_MAX_PROC_NCH];
903 for (x =0; x < m_proc_nch; x ++)
905 memset(tp[x]=(WDL_FFT_REAL*)m_samplesout[x].Add(NULL,wantSamples*sizeof(WDL_FFT_REAL)),0,wantSamples*sizeof(WDL_FFT_REAL));
908 for (x = 0; x < m_engines.GetSize(); x ++)
910 WDL_ConvolutionEngine *eng=m_engines.Get(x);
911 if (eng->m_zl_dumpage>0) { eng->Advance(eng->m_zl_dumpage); eng->m_zl_dumpage=0; }
913 WDL_FFT_REAL **p=eng->Get();
914 if (p)
916 int i;
917 for (i =0; i < m_proc_nch; i ++)
919 WDL_FFT_REAL *o=tp[i];
920 WDL_FFT_REAL *in=p[i];
921 int j=wantSamples;
922 while (j-->0) *o++ += *in++;
925 eng->Advance(wantSamples);
928 timingLeave(1);
930 int av=m_samplesout[0].Available()/sizeof(WDL_FFT_REAL);
931 return av>wso ? wso : av;
935 #ifdef WDL_TEST_CONVO
937 #include <stdio.h>
939 int main(int argc, char **argv)
941 if (argc!=5)
943 printf("usage: convoengine fftsize implen oneoffs pingoffs\n");
944 return -1;
947 int fftsize=atoi(argv[1]);
948 int implen = atoi(argv[2]);
949 int oneoffs = atoi(argv[3]);
950 int pingoffs=atoi(argv[4]);
952 if (implen < 1 || oneoffs < 0 || oneoffs >= implen || pingoffs < 0)
954 printf("invalid parameters\n");
955 return -1;
958 WDL_ImpulseBuffer imp;
959 imp.nch=1;
960 memset(imp.impulses[0].Resize(implen),0,implen*sizeof(WDL_FFT_REAL));
961 imp.impulses[0].Get()[oneoffs]=1.0;
964 #if WDL_TEST_CONVO==2
965 WDL_ConvolutionEngine_Div engine;
966 #else
967 WDL_ConvolutionEngine engine;
968 #endif
969 engine.SetImpulse(&imp,fftsize);
970 WDL_TypedBuf<WDL_FFT_REAL> m_tmpbuf;
971 memset(m_tmpbuf.Resize(pingoffs+1),0,pingoffs*sizeof(WDL_FFT_REAL));
972 m_tmpbuf.Get()[pingoffs]=1.0;
973 WDL_FFT_REAL *p=m_tmpbuf.Get();
974 engine.Add(&p,pingoffs+1,1);
976 p=m_tmpbuf.Resize(4096);
977 memset(p,0,m_tmpbuf.GetSize()*sizeof(WDL_FFT_REAL));
979 int avail;
980 while ((avail=engine.Avail(pingoffs+oneoffs + 8192)) < pingoffs+oneoffs + 8192)
982 engine.Add(&p,4096,1);
984 WDL_FFT_REAL **output = engine.Get();
985 if (!output || !*output)
987 printf("cant get output\n");
988 return -1;
990 int x;
991 for (x = 0; x < avail; x ++)
993 WDL_FFT_REAL val=output[0][x];
994 WDL_FFT_REAL expval = (x==pingoffs+oneoffs) ? 1.0:0.0;
995 if (fabs(val-expval)>0.000000001)
997 printf("%d: %.4fdB - %f %f\n",x,log10(max(val,0.000000000001))*20.0 - log10(max(expval,0.000000000001))*20.0,val,expval);
1001 return 0;
1004 #endif
1007 int WDL_ImpulseBuffer::SetLength(int samples)
1009 int x;
1010 for(x=0;x<m_nch;x++)
1012 int cursz=impulses[x].GetSize();
1013 if (cursz!=samples)
1015 impulses[x].Resize(samples,false);
1017 if (impulses[x].GetSize()!=samples) // validate length!
1019 // ERROR! FREE ALL!
1020 for(x=0;x<m_nch;x++) impulses[x].Resize(0);
1021 return 0;
1025 if (cursz<samples)
1026 memset(impulses[x].Get()+cursz,0,(samples-cursz)*sizeof(WDL_FFT_REAL));
1028 return impulses[0].GetSize();
1032 void WDL_ImpulseBuffer::SetNumChannels(int usench)
1034 if (usench<1) usench=1;
1035 else if (usench>WDL_CONVO_MAX_IMPULSE_NCH) usench=WDL_CONVO_MAX_IMPULSE_NCH;
1037 if (usench > m_nch)
1039 int len = GetLength();
1040 int x,ax=0;
1041 for(x=m_nch;x<usench;x++)
1043 WDL_FFT_REAL *ptr=impulses[x].Resize(len,false);
1044 if (ax<x) memcpy(ptr,impulses[ax].Get(),len*sizeof(WDL_FFT_REAL)); // duplicate channels
1045 else memset(ptr,0,len*sizeof(WDL_FFT_REAL));
1047 if (++ax==m_nch)ax=0;
1049 m_nch=usench;
1051 else if (usench<m_nch)
1053 m_nch=usench;
1054 int x;
1055 for(x=m_nch;x<WDL_CONVO_MAX_IMPULSE_NCH;x++) impulses[x].Resize(0,false);