WDL/convoengine.cpp

   1 /*
   2   WDL - convoengine.cpp
   3   Copyright (C) 2006 and later Cockos Incorporated
   4
   5   This software is provided 'as-is', without any express or implied
   6   warranty.  In no event will the authors be held liable for any damages
   7   arising from the use of this software.
   8
   9   Permission is granted to anyone to use this software for any purpose,
  10   including commercial applications, and to alter it and redistribute it
  11   freely, subject to the following restrictions:
  12
  13   1. The origin of this software must not be misrepresented; you must not
  14      claim that you wrote the original software. If you use this software
  15      in a product, an acknowledgment in the product documentation would be
  16      appreciated but is not required.
  17   2. Altered source versions must be plainly marked as such, and must not be
  18      misrepresented as being the original software.
  19   3. This notice may not be removed or altered from any source distribution.
  20
  21
  22 */
  23
  24 #ifdef _WIN32
  25 #include <windows.h>
  26 #endif
  27 #include <math.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <memory.h>
  31 #include "convoengine.h"
  32
  33 #include "denormal.h"
  34
  35 //#define TIMING
  36 #include "timing.c"
  37
  38 static void WDL_CONVO_CplxMul2(WDL_FFT_COMPLEX *c, WDL_FFT_COMPLEX *a, WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
  39 {
  40   WDL_FFT_REAL t1, t2, t3, t4, t5, t6, t7, t8;
  41   if (n<2 || (n&1)) return;
  42
  43   do {
  44     t1 = a[0].re * b[0].re;
  45     t2 = a[0].im * b[0].im;
  46     t3 = a[0].im * b[0].re;
  47     t4 = a[0].re * b[0].im;
  48     t5 = a[1].re * b[1].re;
  49     t6 = a[1].im * b[1].im;
  50     t7 = a[1].im * b[1].re;
  51     t8 = a[1].re * b[1].im;
  52     t1 -= t2;
  53     t3 += t4;
  54     t5 -= t6;
  55     t7 += t8;
  56     c[0].re = t1;
  57     c[1].re = t5;
  58     c[0].im = t3;
  59     c[1].im = t7;
  60     a += 2;
  61     b += 2;
  62     c += 2;
  63   } while (n -= 2);
  64 }
  65 static void WDL_CONVO_CplxMul3(WDL_FFT_COMPLEX *c, WDL_FFT_COMPLEX *a, WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
  66 {
  67   WDL_FFT_REAL t1, t2, t3, t4, t5, t6, t7, t8;
  68   if (n<2 || (n&1)) return;
  69
  70   do {
  71     t1 = a[0].re * b[0].re;
  72     t2 = a[0].im * b[0].im;
  73     t3 = a[0].im * b[0].re;
  74     t4 = a[0].re * b[0].im;
  75     t5 = a[1].re * b[1].re;
  76     t6 = a[1].im * b[1].im;
  77     t7 = a[1].im * b[1].re;
  78     t8 = a[1].re * b[1].im;
  79     t1 -= t2;
  80     t3 += t4;
  81     t5 -= t6;
  82     t7 += t8;
  83     c[0].re += t1;
  84     c[1].re += t5;
  85     c[0].im += t3;
  86     c[1].im += t7;
  87     a += 2;
  88     b += 2;
  89     c += 2;
  90   } while (n -= 2);
  91 }
  92
  93 static bool CompareQueueToBuf(WDL_FastQueue *q, const void *data, int len)
  94 {
  95   int offs=0;
  96   while (len>0)
  97   {
  98     void *td=NULL;
  99     int sz=q->GetPtr(offs,&td);
 100     if (sz<1) return true; // not enough data = not equal!
 101     if (sz>len) sz=len;
 102
 103     int i=sz/sizeof(WDL_FFT_REAL);
 104     WDL_FFT_REAL *a1=(WDL_FFT_REAL*)td;
 105     WDL_FFT_REAL *b1=(WDL_FFT_REAL*)data;
 106     while (i--)
 107     {
 108       if (fabs(*a1-*b1)>1.0e-7) return true;
 109       a1++;
 110       b1++;
 111     }
 112
 113     data = ((char *)data)+sz;
 114     offs+=sz;
 115     len-=sz;
 116   }
 117   return false;
 118 }
 119
 120
 121 WDL_ConvolutionEngine::WDL_ConvolutionEngine()
 122 {
 123   WDL_fft_init();
 124   m_impulse_nch=1;
 125   m_fft_size=0;
 126   m_impulse_len=0;
 127   m_proc_nch=0;
 128 }
 129
 130 WDL_ConvolutionEngine::~WDL_ConvolutionEngine()
 131 {
 132 }
 133
 134 int WDL_ConvolutionEngine::SetImpulse(WDL_ImpulseBuffer *impulse, int fft_size, int impulse_sample_offset, int max_imp_size, bool forceBrute)
 135 {
 136   int impulse_len=0;
 137   int x;
 138   int nch=impulse->GetNumChannels();
 139   for (x = 0; x < nch; x ++)
 140   {
 141     int l=impulse->impulses[x].GetSize()-impulse_sample_offset;
 142     if (max_imp_size && l>max_imp_size) l=max_imp_size;
 143     if (impulse_len < l) impulse_len=l;
 144   }
 145   m_impulse_nch=nch;
 146
 147   if (m_impulse_nch>1) // detect mono signals pretending to be multichannel
 148   {
 149     for (x = 1; x < m_impulse_nch; x ++)
 150     {
 151       if (impulse->impulses[x].GetSize()!=impulse->impulses[0].GetSize()||
 152           memcmp(impulse->impulses[x].Get(),impulse->impulses[0].Get(),
 153             impulse->impulses[0].GetSize()*sizeof(WDL_FFT_REAL)))
 154             break;
 155     }
 156     if (x >= m_impulse_nch) m_impulse_nch=1;
 157   }
 158
 159   m_impulse_len=impulse_len;
 160   m_proc_nch=-1;
 161
 162
 163   if (forceBrute)
 164   {
 165     m_fft_size=0;
 166
 167     // save impulse
 168     for (x = 0; x < m_impulse_nch; x ++)
 169     {
 170       WDL_FFT_REAL *imp=impulse->impulses[x].Get()+impulse_sample_offset;
 171       int lenout=impulse->impulses[x].GetSize()-impulse_sample_offset;
 172       if (max_imp_size && lenout>max_imp_size) lenout=max_imp_size;
 173
 174       WDL_CONVO_IMPULSEBUFf *impout=m_impulse[x].Resize(lenout)+lenout;
 175       while (lenout-->0) *--impout = (WDL_CONVO_IMPULSEBUFf) *imp++;
 176     }
 177
 178     for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
 179     {
 180       m_samplesin[x].Clear();
 181       m_samplesin2[x].Clear();
 182       m_samplesout[x].Clear();
 183     }
 184
 185     return 0;
 186   }
 187
 188
 189   if (fft_size<=0)
 190   {
 191     int msz=fft_size<=-16? -fft_size*2 : 32768;
 192
 193     fft_size=32;
 194     while (fft_size < impulse_len*2 && fft_size < msz) fft_size*=2;
 195   }
 196
 197   m_fft_size=fft_size;
 198
 199   int impchunksize=fft_size/2;
 200   int nblocks=(impulse_len+impchunksize-1)/impchunksize;
 201   //char buf[512];
 202   //sprintf(buf,"il=%d, ffts=%d, cs=%d, nb=%d\n",impulse_len,fft_size,impchunksize,nblocks);
 203   //OutputDebugString(buf);
 204
 205   const bool smallerSizeMode=sizeof(WDL_CONVO_IMPULSEBUFf)!=sizeof(WDL_FFT_REAL);
 206
 207   WDL_FFT_REAL scale=(WDL_FFT_REAL) (1.0/fft_size);
 208   for (x = 0; x < m_impulse_nch; x ++)
 209   {
 210     WDL_FFT_REAL *imp=impulse->impulses[x].Get()+impulse_sample_offset;
 211
 212     WDL_FFT_REAL *imp2=x < m_impulse_nch-1 ? impulse->impulses[x+1].Get()+impulse_sample_offset : NULL;
 213
 214     WDL_CONVO_IMPULSEBUFf *impout=m_impulse[x].Resize((nblocks+!!smallerSizeMode)*fft_size*2);
 215     char *zbuf=m_impulse_zflag[x].Resize(nblocks);
 216     int lenout=impulse->impulses[x].GetSize()-impulse_sample_offset;
 217     if (max_imp_size && lenout>max_imp_size) lenout=max_imp_size;
 218
 219     int bl;
 220     for (bl = 0; bl < nblocks; bl ++)
 221     {
 222
 223       int thissz=lenout;
 224       if (thissz > impchunksize) thissz=impchunksize;
 225
 226       lenout -= thissz;
 227       int i=0;
 228       WDL_FFT_REAL mv=0.0;
 229       WDL_FFT_REAL mv2=0.0;
 230       WDL_FFT_REAL *imptmp = (WDL_FFT_REAL *)impout; //-V615
 231
 232       for (; i < thissz; i ++)
 233       {
 234         WDL_FFT_REAL v=*imp++;
 235         WDL_FFT_REAL v2=(WDL_FFT_REAL)fabs(v);
 236         if (v2 > mv) mv=v2;
 237
 238         imptmp[i*2]=denormal_filter_aggressive(v * scale);
 239
 240         if (imp2)
 241         {
 242           v=*imp2++;
 243           v2=(WDL_FFT_REAL)fabs(v);
 244           if (v2>mv2) mv2=v2;
 245           imptmp[i*2+1]=denormal_filter_aggressive(v*scale);
 246         }
 247         else imptmp[i*2+1]=0.0;
 248       }
 249       for (; i < fft_size; i ++)
 250       {
 251         imptmp[i*2]=0.0;
 252         imptmp[i*2+1]=0.0;
 253       }
 254       if (mv>1.0e-14||mv2>1.0e-14)
 255       {
 256         *zbuf++=mv>1.0e-14 ? 2 : 1; // 1 means only second channel has content
 257         WDL_fft((WDL_FFT_COMPLEX*)impout,fft_size,0);
 258
 259         if (smallerSizeMode)
 260         {
 261           int x,n=fft_size*2;
 262           for(x=0;x<n;x++) impout[x]=(WDL_CONVO_IMPULSEBUFf)imptmp[x];
 263         }
 264       }
 265       else *zbuf++=0;
 266
 267       impout+=fft_size*2;
 268     }
 269   }
 270   return m_fft_size/2;
 271 }
 272
 273
 274 void WDL_ConvolutionEngine::Reset() // clears out any latent samples
 275 {
 276   int x;
 277   memset(m_hist_pos,0,sizeof(m_hist_pos));
 278   for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
 279   {
 280     m_samplesin[x].Clear();
 281     m_samplesin2[x].Clear();
 282     m_samplesout[x].Clear();
 283     memset(m_samplehist_zflag[x].Get(),0,m_samplehist_zflag[x].GetSize());
 284     memset(m_samplehist[x].Get(),0,m_samplehist[x].GetSize()*sizeof(WDL_FFT_REAL));
 285     memset(m_overlaphist[x].Get(),0,m_overlaphist[x].GetSize()*sizeof(WDL_FFT_REAL));
 286   }
 287 }
 288
 289 void WDL_ConvolutionEngine::Add(WDL_FFT_REAL **bufs, int len, int nch)
 290 {
 291   if (m_fft_size<1)
 292   {
 293     int ch;
 294     m_proc_nch=nch;
 295     for (ch = 0; ch < nch; ch ++)
 296     {
 297       int wch=ch;
 298       if (wch >=m_impulse_nch) wch-=m_impulse_nch;
 299       WDL_CONVO_IMPULSEBUFf *imp=m_impulse[wch].Get();
 300       int imp_len = m_impulse[wch].GetSize();
 301
 302
 303       if (imp_len>0)
 304       {
 305         if (m_samplesin2[ch].Available()<imp_len*(int)sizeof(WDL_FFT_REAL))
 306         {
 307           int sza=imp_len*sizeof(WDL_FFT_REAL)-m_samplesin2[ch].Available();
 308           memset(m_samplesin2[ch].Add(NULL,sza),0,sza);
 309         }
 310         WDL_FFT_REAL *psrc;
 311
 312         if (bufs && bufs[ch])
 313           psrc=(WDL_FFT_REAL*)m_samplesin2[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
 314         else
 315         {
 316           psrc=(WDL_FFT_REAL*)m_samplesin2[ch].Add(NULL,len*sizeof(WDL_FFT_REAL));
 317           memset(psrc,0,len*sizeof(WDL_FFT_REAL));
 318         }
 319
 320         WDL_FFT_REAL *pout=(WDL_FFT_REAL*)m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL));
 321         int x;
 322         int len1 = len&~1;
 323         for (x=0; x < len1 ; x += 2)
 324         {
 325           int i=imp_len;
 326           double sum=0.0,sum2=0.0;
 327           WDL_FFT_REAL *sp=psrc+x-imp_len + 1;
 328           WDL_CONVO_IMPULSEBUFf *ip=imp;
 329           int j=i/4; i&=3;
 330           while (j--) // produce 2 samples, 4 impulse samples at a time
 331           {
 332             double a = ip[0],b=ip[1],aa=ip[2],bb=ip[3];
 333             double c = sp[1],d=sp[2],cc=sp[3];
 334             sum+=a * sp[0] + b * c + aa * d + bb * cc;
 335             sum2+=a * c + b * d + aa * cc + bb * sp[4];
 336             ip+=4;
 337             sp+=4;
 338           }
 339
 340           while (i--)
 341           {
 342             double a = *ip++;
 343             sum+=a * sp[0];
 344             sum2+=a * sp[1];
 345             sp++;
 346           }
 347           pout[x]=(WDL_FFT_REAL) sum;
 348           pout[x+1]=(WDL_FFT_REAL) sum2;
 349         }
 350         for(;x<len;x++) // any odd samples left
 351         {
 352           int i=imp_len;
 353           double sum=0.0;
 354           WDL_FFT_REAL *sp=psrc+x-imp_len + 1;
 355           WDL_CONVO_IMPULSEBUFf *ip=imp;
 356           int j=i/4; i&=3;
 357           while (j--)
 358           {
 359             sum+=ip[0] * sp[0] + ip[1] * sp[1] + ip[2] * sp[2] + ip[3] * sp[3];
 360             ip+=4;
 361             sp+=4;
 362           }
 363
 364           while (i--) sum+=*ip++ * *sp++;
 365           pout[x]=(WDL_FFT_REAL) sum;
 366         }
 367         m_samplesin2[ch].Advance(len*sizeof(WDL_FFT_REAL));
 368         m_samplesin2[ch].Compact();
 369       }
 370       else
 371       {
 372         if (bufs && bufs[ch]) m_samplesout[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
 373         else
 374         {
 375           memset(m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
 376         }
 377       }
 378
 379     }
 380     return;
 381   }
 382
 383
 384   int impchunksize=m_fft_size/2;
 385   int nblocks=(m_impulse_len+impchunksize-1)/impchunksize;
 386
 387   if (m_proc_nch != nch)
 388   {
 389     m_proc_nch=nch;
 390     memset(m_hist_pos,0,sizeof(m_hist_pos));
 391     int x;
 392     int mso=0;
 393     for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
 394     {
 395       int so=m_samplesin[x].Available() + m_samplesout[x].Available();
 396       if (so>mso) mso=so;
 397
 398       if (x>=nch)
 399       {
 400         m_samplesin[x].Clear();
 401         m_samplesout[x].Clear();
 402       }
 403       else
 404       {
 405         if (m_impulse_len<1||!nblocks)
 406         {
 407           if (m_samplesin[x].Available())
 408           {
 409             int s=m_samplesin[x].Available();
 410             void *buf=m_samplesout[x].Add(NULL,s);
 411             m_samplesin[x].GetToBuf(0,buf,s);
 412             m_samplesin[x].Clear();
 413           }
 414         }
 415
 416         if (so < mso)
 417         {
 418           memset(m_samplesout[x].Add(NULL,mso-so),0,mso-so);
 419         }
 420       }
 421
 422       int sz=0;
 423       if (x<nch) sz=nblocks*m_fft_size;
 424
 425       memset(m_samplehist_zflag[x].Resize(nblocks),0,nblocks);
 426       m_samplehist[x].Resize(sz*2);
 427       m_overlaphist[x].Resize(x<nch ? m_fft_size/2 : 0);
 428       memset(m_samplehist[x].Get(),0,m_samplehist[x].GetSize()*sizeof(WDL_FFT_REAL));
 429       memset(m_overlaphist[x].Get(),0,m_overlaphist[x].GetSize()*sizeof(WDL_FFT_REAL));
 430     }
 431   }
 432
 433   int ch;
 434   if (m_impulse_len<1||!nblocks)
 435   {
 436     for (ch = 0; ch < nch; ch ++)
 437     {
 438       if (bufs && bufs[ch])
 439         m_samplesout[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
 440       else
 441         memset(m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
 442     }
 443     // pass through
 444     return;
 445   }
 446
 447   for (ch = 0; ch < nch; ch ++)
 448   {
 449     if (!m_samplehist[ch].GetSize()||!m_overlaphist[ch].GetSize()) continue;
 450
 451     m_samplesin[ch].Add(bufs ? bufs[ch] : NULL,len*sizeof(WDL_FFT_REAL));
 452
 453   }
 454 }
 455
 456 void WDL_ConvolutionEngine::AddSilenceToOutput(int len, int nch)
 457 {
 458   int x;
 459   for(x=0;x<nch&&x<m_proc_nch;x++)
 460   {
 461     memset(m_samplesout[x].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
 462   }
 463 }
 464
 465 int WDL_ConvolutionEngine::Avail(int want)
 466 {
 467   if (m_fft_size<1)
 468   {
 469     return m_samplesout[0].Available()/sizeof(WDL_FFT_REAL);
 470   }
 471
 472   const int sz=m_fft_size/2;
 473   const int chunksize=m_fft_size/2;
 474   const int nblocks=(m_impulse_len+chunksize-1)/chunksize;
 475   // clear combining buffer
 476   WDL_FFT_REAL *workbuf2 = m_combinebuf.Resize(m_fft_size*4); // temp space
 477
 478   int ch;
 479
 480   for (ch = 0; ch < m_proc_nch; ch ++)
 481   {
 482     if (!m_samplehist[ch].GetSize()||!m_overlaphist[ch].GetSize()) continue;
 483     int srcc=ch;
 484     if (srcc>=m_impulse_nch) srcc=m_impulse_nch-1;
 485
 486     bool allow_mono_input_mode=true;
 487     bool mono_impulse_mode=false;
 488
 489     if (m_impulse_nch==1 && ch<m_proc_nch-1 &&
 490         m_samplehist[ch+1].GetSize()&&m_overlaphist[ch+1].GetSize() &&
 491         m_samplesin[ch].Available()==m_samplesin[ch+1].Available() &&
 492         m_samplesout[ch].Available()==m_samplesout[ch+1].Available()
 493         )
 494     { // 2x processing mode
 495       mono_impulse_mode=true;
 496       allow_mono_input_mode=false;
 497     }
 498
 499
 500     const int in_needed=sz;
 501
 502     // useSilentList[x] = 1 for mono signal, 2 for stereo, 0 for silent
 503     char *useSilentList=m_samplehist_zflag[ch].GetSize()==nblocks ? m_samplehist_zflag[ch].Get() : NULL;
 504     while (m_samplesin[ch].Available()/(int)sizeof(WDL_FFT_REAL) >= sz &&
 505            m_samplesout[ch].Available() < want*(int)sizeof(WDL_FFT_REAL))
 506     {
 507       int histpos;
 508       if ((histpos=++m_hist_pos[ch]) >= nblocks) histpos=m_hist_pos[ch]=0;
 509
 510       // get samples from input, to history
 511       WDL_FFT_REAL *optr = m_samplehist[ch].Get()+histpos*m_fft_size*2;
 512
 513       m_samplesin[ch].GetToBuf(0,optr+sz,in_needed*sizeof(WDL_FFT_REAL));
 514       m_samplesin[ch].Advance(in_needed*sizeof(WDL_FFT_REAL));
 515
 516
 517       bool mono_input_mode=false;
 518
 519       bool nonzflag=false;
 520       if (mono_impulse_mode)
 521       {
 522         if (++m_hist_pos[ch+1] >= nblocks) m_hist_pos[ch+1]=0;
 523         m_samplesin[ch+1].GetToBuf(0,workbuf2,sz*sizeof(WDL_FFT_REAL));
 524         m_samplesin[ch+1].Advance(sz*sizeof(WDL_FFT_REAL));
 525         int i;
 526         for (i = 0; i < sz; i ++) // unpack samples
 527         {
 528           WDL_FFT_REAL f = optr[i*2]=denormal_filter_aggressive(optr[sz+i]);
 529           if (!nonzflag && (f<-1.0e-6 || f>1.0e-6)) nonzflag=true;
 530           f=optr[i*2+1]=denormal_filter_aggressive(workbuf2[i]);
 531           if (!nonzflag && (f<-1.0e-6 || f>1.0e-6)) nonzflag=true;
 532         }
 533       }
 534       else
 535       {
 536         if (allow_mono_input_mode &&
 537           ch < m_proc_nch-1 &&
 538           srcc<m_impulse_nch-1 &&
 539           !CompareQueueToBuf(&m_samplesin[ch+1],optr+sz,sz*sizeof(WDL_FFT_REAL))
 540           )
 541         {
 542           mono_input_mode=true;
 543         }
 544         else
 545         {
 546           allow_mono_input_mode=false;
 547         }
 548
 549         int i;
 550         for (i = 0; i < sz; i ++) // unpack samples
 551         {
 552           WDL_FFT_REAL f=optr[i*2]=denormal_filter_aggressive(optr[sz+i]);
 553           optr[i*2+1]=0.0;
 554           if (!nonzflag && (f<-1.0e-6 || f>1.0e-6)) nonzflag=true;
 555         }
 556       }
 557
 558       int i;
 559       for (i = 1; mono_input_mode && i < nblocks; i ++) // start @ 1, since hist[histpos] is no longer used for here
 560       {
 561         int srchistpos = histpos-i;
 562         if (srchistpos < 0) srchistpos += nblocks;
 563         if (useSilentList[srchistpos]==2) mono_input_mode=false;
 564       }
 565
 566       if (nonzflag||!useSilentList) memset(optr+sz*2,0,sz*2*sizeof(WDL_FFT_REAL));
 567
 568
 569 #ifdef WDLCONVO_ZL_ACCOUNTING
 570       m_zl_fftcnt++;
 571 #endif
 572
 573       if (nonzflag) WDL_fft((WDL_FFT_COMPLEX*)optr,m_fft_size,0);
 574
 575       if (useSilentList) useSilentList[histpos]=nonzflag ? (mono_input_mode ? 1 : 2) : 0;
 576
 577       int mzfl=2;
 578       if (mono_input_mode)
 579       {
 580         mzfl=1;
 581
 582         m_samplesin[ch+1].Advance(sz*sizeof(WDL_FFT_REAL));
 583
 584         // save a valid copy in sample hist incase we switch from mono to stereo
 585         if (++m_hist_pos[ch+1] >= nblocks) m_hist_pos[ch+1]=0;
 586         WDL_FFT_REAL *optr2 = m_samplehist[ch+1].Get()+m_hist_pos[ch+1]*m_fft_size*2;
 587         memcpy(optr2,optr,m_fft_size*2*sizeof(WDL_FFT_REAL));
 588       }
 589
 590       int applycnt=0;
 591       char *useImpSilentList=m_impulse_zflag[srcc].GetSize() == nblocks ? m_impulse_zflag[srcc].Get() : NULL;
 592
 593       WDL_CONVO_IMPULSEBUFf *impulseptr=m_impulse[srcc].Get();
 594       for (i = 0; i < nblocks; i ++, impulseptr+=m_fft_size*2)
 595       {
 596         int srchistpos = histpos-i;
 597         if (srchistpos < 0) srchistpos += nblocks;
 598
 599         if (useImpSilentList && useImpSilentList[i]<mzfl) continue;
 600         if (useSilentList && !useSilentList[srchistpos]) continue; // silent block
 601
 602         WDL_FFT_REAL *samplehist=m_samplehist[ch].Get() + m_fft_size*srchistpos*2;
 603
 604         if (applycnt++) // add to output
 605           WDL_CONVO_CplxMul3((WDL_FFT_COMPLEX*)workbuf2,(WDL_FFT_COMPLEX*)samplehist,(WDL_CONVO_IMPULSEBUFCPLXf*)impulseptr,m_fft_size);
 606         else // replace output
 607           WDL_CONVO_CplxMul2((WDL_FFT_COMPLEX*)workbuf2,(WDL_FFT_COMPLEX*)samplehist,(WDL_CONVO_IMPULSEBUFCPLXf*)impulseptr,m_fft_size);
 608
 609       }
 610       if (!applycnt)
 611         memset(workbuf2,0,m_fft_size*2*sizeof(WDL_FFT_REAL));
 612       else
 613         WDL_fft((WDL_FFT_COMPLEX*)workbuf2,m_fft_size,1);
 614
 615       WDL_FFT_REAL *olhist=m_overlaphist[ch].Get(); // errors from last time
 616       WDL_FFT_REAL *p1=workbuf2,*p3=workbuf2+m_fft_size,*p1o=workbuf2;
 617
 618       if (mono_impulse_mode||mono_input_mode)
 619       {
 620         WDL_FFT_REAL *p2o=workbuf2+m_fft_size*2;
 621         WDL_FFT_REAL *olhist2=m_overlaphist[ch+1].Get(); // errors from last time
 622         int s=sz/2;
 623         while (s--)
 624         {
 625           p2o[0] = p1[1]+olhist2[0];
 626           p2o[1] = p1[3]+olhist2[1];
 627           p1o[0] = p1[0]+olhist[0];
 628           p1o[1] = p1[2]+olhist[1];
 629           p1o+=2;
 630           p2o+=2;
 631           p1+=4;
 632
 633           olhist[0]=p3[0];
 634           olhist[1]=p3[2];
 635           olhist2[0]=p3[1];
 636           olhist2[1]=p3[3];
 637           p3+=4;
 638
 639           olhist+=2;
 640           olhist2+=2;
 641         }
 642         // add samples to output
 643         m_samplesout[ch].Add(workbuf2,sz*sizeof(WDL_FFT_REAL));
 644         m_samplesout[ch+1].Add(workbuf2+m_fft_size*2,sz*sizeof(WDL_FFT_REAL));
 645       }
 646       else
 647       {
 648         int s=sz/2;
 649         while (s--)
 650         {
 651           p1o[0] = p1[0]+olhist[0];
 652           p1o[1] = p1[2]+olhist[1];
 653           p1o+=2;
 654           p1+=4;
 655
 656           olhist[0]=p3[0];
 657           olhist[1]=p3[2];
 658           p3+=4;
 659
 660           olhist+=2;
 661         }
 662         // add samples to output
 663         m_samplesout[ch].Add(workbuf2,sz*sizeof(WDL_FFT_REAL));
 664       }
 665     } // while available
 666
 667     if (mono_impulse_mode) ch++;
 668   }
 669
 670   int mv = want;
 671   for (ch=0;ch<m_proc_nch;ch++)
 672   {
 673     int v = m_samplesout[ch].Available()/sizeof(WDL_FFT_REAL);
 674     if (!ch || v<mv)mv=v;
 675   }
 676   return mv;
 677 }
 678
 679 WDL_FFT_REAL **WDL_ConvolutionEngine::Get()
 680 {
 681   int x;
 682   for (x = 0; x < m_proc_nch; x ++)
 683   {
 684     m_get_tmpptrs[x]=(WDL_FFT_REAL *)m_samplesout[x].Get();
 685   }
 686   return m_get_tmpptrs;
 687 }
 688
 689 void WDL_ConvolutionEngine::Advance(int len)
 690 {
 691   int x;
 692   for (x = 0; x < m_proc_nch; x ++)
 693   {
 694     m_samplesout[x].Advance(len*sizeof(WDL_FFT_REAL));
 695     m_samplesout[x].Compact();
 696   }
 697 }
 698
 699
 700
 701 /****************************************************************
 702 **  low latency version
 703 */
 704
 705 WDL_ConvolutionEngine_Div::WDL_ConvolutionEngine_Div()
 706 {
 707   timingInit();
 708   m_proc_nch=2;
 709   m_need_feedsilence=true;
 710 }
 711
 712 int WDL_ConvolutionEngine_Div::SetImpulse(WDL_ImpulseBuffer *impulse, int maxfft_size, int known_blocksize, int max_imp_size, int impulse_offset, int latency_allowed)
 713 {
 714   m_need_feedsilence=true;
 715
 716   m_engines.Empty(true);
 717   if (maxfft_size<0)maxfft_size=-maxfft_size;
 718   maxfft_size*=2;
 719   if (!maxfft_size || maxfft_size>32768) maxfft_size=32768;
 720
 721
 722   const int MAX_SIZE_FOR_BRUTE=64;
 723
 724   int fftsize = MAX_SIZE_FOR_BRUTE;
 725   int impulsechunksize = MAX_SIZE_FOR_BRUTE;
 726
 727   if (known_blocksize && !(known_blocksize&(known_blocksize-1)) && known_blocksize>MAX_SIZE_FOR_BRUTE*2)
 728   {
 729     fftsize=known_blocksize/2;
 730     impulsechunksize=known_blocksize/2;
 731   }
 732   if (latency_allowed*2 > fftsize)
 733   {
 734     int x = 16;
 735     while (x <= latency_allowed) x*=2;
 736     if (x>32768) x=32768;
 737     fftsize=impulsechunksize=x;
 738   }
 739
 740   int offs=0;
 741   int samplesleft=impulse->impulses[0].GetSize()-impulse_offset;
 742   if (max_imp_size>0 && samplesleft>max_imp_size) samplesleft=max_imp_size;
 743
 744   do
 745   {
 746     WDL_ConvolutionEngine *eng=new WDL_ConvolutionEngine;
 747
 748     bool wantBrute = !latency_allowed && !offs;
 749     if (impulsechunksize*(wantBrute ? 2 : 3) >= samplesleft) impulsechunksize=samplesleft; // early-out, no point going to a larger FFT (since if we did this, we wouldnt have enough samples for a complete next pass)
 750     if (fftsize>=maxfft_size) { impulsechunksize=samplesleft; fftsize=maxfft_size; } // if FFTs are as large as possible, finish up
 751
 752     eng->SetImpulse(impulse,fftsize,offs+impulse_offset,impulsechunksize, wantBrute);
 753     eng->m_zl_delaypos = offs;
 754     eng->m_zl_dumpage=0;
 755     m_engines.Add(eng);
 756
 757 #ifdef WDLCONVO_ZL_ACCOUNTING
 758     char buf[512];
 759     wsprintf(buf,"ce%d: offs=%d, len=%d, fftsize=%d\n",m_engines.GetSize(),offs,impulsechunksize,fftsize);
 760     OutputDebugString(buf);
 761 #endif
 762
 763     samplesleft -= impulsechunksize;
 764     offs+=impulsechunksize;
 765
 766 #if 1 // this seems about 10% faster (maybe due to better cache use from less sized ffts used?)
 767     impulsechunksize=offs*3;
 768     fftsize=offs*2;
 769 #else
 770     impulsechunksize=fftsize;
 771
 772     fftsize*=2;
 773 #endif
 774   }
 775   while (samplesleft > 0);
 776
 777   return GetLatency();
 778 }
 779
 780 int WDL_ConvolutionEngine_Div::GetLatency()
 781 {
 782   return m_engines.GetSize() ? m_engines.Get(0)->GetLatency() : 0;
 783 }
 784
 785
 786 void WDL_ConvolutionEngine_Div::Reset()
 787 {
 788   int x;
 789   for (x = 0; x < m_engines.GetSize(); x ++)
 790   {
 791     WDL_ConvolutionEngine *eng=m_engines.Get(x);
 792     eng->Reset();
 793   }
 794   for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
 795   {
 796     m_samplesout[x].Clear();
 797   }
 798
 799   m_need_feedsilence=true;
 800 }
 801
 802 WDL_ConvolutionEngine_Div::~WDL_ConvolutionEngine_Div()
 803 {
 804   timingPrint();
 805   m_engines.Empty(true);
 806 }
 807
 808 void WDL_ConvolutionEngine_Div::Add(WDL_FFT_REAL **bufs, int len, int nch)
 809 {
 810   m_proc_nch=nch;
 811
 812   bool ns=m_need_feedsilence;
 813   m_need_feedsilence=false;
 814
 815   int x;
 816   for (x = 0; x < m_engines.GetSize(); x ++)
 817   {
 818     WDL_ConvolutionEngine *eng=m_engines.Get(x);
 819     if (ns)
 820     {
 821       eng->m_zl_dumpage = (x>0 && x < m_engines.GetSize()-1) ? (eng->GetLatency()/4) : 0; // reduce max number of ffts per block by staggering them
 822
 823       if (eng->m_zl_dumpage>0)
 824         eng->Add(NULL,eng->m_zl_dumpage,nch); // added silence to input (to control when fft happens)
 825     }
 826
 827     eng->Add(bufs,len,nch);
 828
 829     if (ns) eng->AddSilenceToOutput(eng->m_zl_delaypos,nch); // add silence to output (to delay output to its correct time)
 830
 831   }
 832 }
 833 WDL_FFT_REAL **WDL_ConvolutionEngine_Div::Get()
 834 {
 835   int x;
 836   for (x = 0; x < m_proc_nch; x ++)
 837   {
 838     m_get_tmpptrs[x]=(WDL_FFT_REAL *)m_samplesout[x].Get();
 839   }
 840   return m_get_tmpptrs;
 841 }
 842
 843 void WDL_ConvolutionEngine_Div::Advance(int len)
 844 {
 845   int x;
 846   for (x = 0; x < m_proc_nch; x ++)
 847   {
 848     m_samplesout[x].Advance(len*sizeof(WDL_FFT_REAL));
 849     m_samplesout[x].Compact();
 850   }
 851 }
 852
 853 int WDL_ConvolutionEngine_Div::Avail(int wantSamples)
 854 {
 855   timingEnter(1);
 856   int wso=wantSamples;
 857   int x;
 858 #ifdef WDLCONVO_ZL_ACCOUNTING
 859   int cnt=0;
 860   static int maxcnt=-1;
 861   int h=0;
 862 #endif
 863   for (x = 0; x < m_engines.GetSize(); x ++)
 864   {
 865     WDL_ConvolutionEngine *eng=m_engines.Get(x);
 866 #ifdef WDLCONVO_ZL_ACCOUNTING
 867     eng->m_zl_fftcnt=0;
 868 #endif
 869     int a=eng->Avail(wso+eng->m_zl_dumpage) - eng->m_zl_dumpage;
 870 #ifdef WDLCONVO_ZL_ACCOUNTING
 871     cnt += !!eng->m_zl_fftcnt;
 872
 873 #if 0
 874     if (eng->m_zl_fftcnt)
 875       h|=1<<x;
 876
 877     if (eng->m_zl_fftcnt && x==m_engines.GetSize()-1 && cnt>1)
 878     {
 879       char buf[512];
 880       wsprintf(buf,"fft flags=%08x (%08x=max)\n",h,1<<x);
 881       OutputDebugString(buf);
 882     }
 883 #endif
 884 #endif
 885     if (a < wantSamples) wantSamples=a;
 886   }
 887
 888 #ifdef WDLCONVO_ZL_ACCOUNTING
 889   static DWORD lastt=0;
 890   if (cnt>maxcnt)maxcnt=cnt;
 891   if (GetTickCount()>lastt+1000)
 892   {
 893     lastt=GetTickCount();
 894     char buf[512];
 895     wsprintf(buf,"maxcnt=%d\n",maxcnt);
 896     OutputDebugString(buf);
 897     maxcnt=-1;
 898   }
 899 #endif
 900   if (wantSamples>0)
 901   {
 902     WDL_FFT_REAL *tp[WDL_CONVO_MAX_PROC_NCH];
 903     for (x =0; x < m_proc_nch; x ++)
 904     {
 905       memset(tp[x]=(WDL_FFT_REAL*)m_samplesout[x].Add(NULL,wantSamples*sizeof(WDL_FFT_REAL)),0,wantSamples*sizeof(WDL_FFT_REAL));
 906     }
 907
 908     for (x = 0; x < m_engines.GetSize(); x ++)
 909     {
 910       WDL_ConvolutionEngine *eng=m_engines.Get(x);
 911       if (eng->m_zl_dumpage>0) { eng->Advance(eng->m_zl_dumpage); eng->m_zl_dumpage=0; }
 912
 913       WDL_FFT_REAL **p=eng->Get();
 914       if (p)
 915       {
 916         int i;
 917         for (i =0; i < m_proc_nch; i ++)
 918         {
 919           WDL_FFT_REAL *o=tp[i];
 920           WDL_FFT_REAL *in=p[i];
 921           int j=wantSamples;
 922           while (j-->0) *o++ += *in++;
 923         }
 924       }
 925       eng->Advance(wantSamples);
 926     }
 927   }
 928   timingLeave(1);
 929
 930   int av=m_samplesout[0].Available()/sizeof(WDL_FFT_REAL);
 931   return av>wso ? wso : av;
 932 }
 933
 934
 935 #ifdef WDL_TEST_CONVO
 936
 937 #include <stdio.h>
 938
 939 int main(int argc, char **argv)
 940 {
 941   if (argc!=5)
 942   {
 943     printf("usage: convoengine fftsize implen oneoffs pingoffs\n");
 944     return -1;
 945   }
 946
 947   int fftsize=atoi(argv[1]);
 948   int implen = atoi(argv[2]);
 949   int oneoffs = atoi(argv[3]);
 950   int pingoffs=atoi(argv[4]);
 951
 952   if (implen < 1 || oneoffs < 0 || oneoffs >= implen || pingoffs < 0)
 953   {
 954     printf("invalid parameters\n");
 955     return -1;
 956   }
 957
 958   WDL_ImpulseBuffer imp;
 959   imp.nch=1;
 960   memset(imp.impulses[0].Resize(implen),0,implen*sizeof(WDL_FFT_REAL));
 961   imp.impulses[0].Get()[oneoffs]=1.0;
 962
 963
 964 #if WDL_TEST_CONVO==2
 965   WDL_ConvolutionEngine_Div engine;
 966 #else
 967   WDL_ConvolutionEngine engine;
 968 #endif
 969   engine.SetImpulse(&imp,fftsize);
 970   WDL_TypedBuf<WDL_FFT_REAL> m_tmpbuf;
 971   memset(m_tmpbuf.Resize(pingoffs+1),0,pingoffs*sizeof(WDL_FFT_REAL));
 972   m_tmpbuf.Get()[pingoffs]=1.0;
 973   WDL_FFT_REAL *p=m_tmpbuf.Get();
 974   engine.Add(&p,pingoffs+1,1);
 975
 976   p=m_tmpbuf.Resize(4096);
 977   memset(p,0,m_tmpbuf.GetSize()*sizeof(WDL_FFT_REAL));
 978
 979   int avail;
 980   while ((avail=engine.Avail(pingoffs+oneoffs + 8192)) < pingoffs+oneoffs + 8192)
 981   {
 982     engine.Add(&p,4096,1);
 983   }
 984   WDL_FFT_REAL **output = engine.Get();
 985   if (!output || !*output)
 986   {
 987     printf("cant get output\n");
 988     return -1;
 989   }
 990   int x;
 991   for (x = 0; x < avail; x ++)
 992   {
 993     WDL_FFT_REAL val=output[0][x];
 994     WDL_FFT_REAL expval = (x==pingoffs+oneoffs) ? 1.0:0.0;
 995     if (fabs(val-expval)>0.000000001)
 996     {
 997       printf("%d: %.4fdB - %f %f\n",x,log10(max(val,0.000000000001))*20.0 - log10(max(expval,0.000000000001))*20.0,val,expval);
 998     }
 999   }
1000
1001   return 0;
1002 }
1003
1004 #endif
1005
1006
1007 int WDL_ImpulseBuffer::SetLength(int samples)
1008 {
1009   int x;
1010   for(x=0;x<m_nch;x++)
1011   {
1012     int cursz=impulses[x].GetSize();
1013     if (cursz!=samples)
1014     {
1015       impulses[x].Resize(samples,false);
1016
1017       if (impulses[x].GetSize()!=samples) // validate length!
1018       {
1019         // ERROR! FREE ALL!
1020         for(x=0;x<m_nch;x++) impulses[x].Resize(0);
1021         return 0;
1022       }
1023     }
1024
1025     if (cursz<samples)
1026       memset(impulses[x].Get()+cursz,0,(samples-cursz)*sizeof(WDL_FFT_REAL));
1027   }
1028   return impulses[0].GetSize();
1029 }
1030
1031
1032 void WDL_ImpulseBuffer::SetNumChannels(int usench)
1033 {
1034   if (usench<1) usench=1;
1035   else if (usench>WDL_CONVO_MAX_IMPULSE_NCH) usench=WDL_CONVO_MAX_IMPULSE_NCH;
1036
1037   if (usench > m_nch)
1038   {
1039     int len = GetLength();
1040     int x,ax=0;
1041     for(x=m_nch;x<usench;x++)
1042     {
1043       WDL_FFT_REAL *ptr=impulses[x].Resize(len,false);
1044       if (ax<x) memcpy(ptr,impulses[ax].Get(),len*sizeof(WDL_FFT_REAL)); // duplicate channels
1045       else memset(ptr,0,len*sizeof(WDL_FFT_REAL));
1046
1047       if (++ax==m_nch)ax=0;
1048     }
1049     m_nch=usench;
1050   }
1051   else if (usench<m_nch)
1052   {
1053     m_nch=usench;
1054     int x;
1055     for(x=m_nch;x<WDL_CONVO_MAX_IMPULSE_NCH;x++) impulses[x].Resize(0,false);
1056   }
1057 }