WDL/convoengine.cpp

   1 /*
   2   WDL - convoengine.cpp
   3   Copyright (C) 2006 and later Cockos Incorporated
   4
   5   This software is provided 'as-is', without any express or implied
   6   warranty.  In no event will the authors be held liable for any damages
   7   arising from the use of this software.
   8
   9   Permission is granted to anyone to use this software for any purpose,
  10   including commercial applications, and to alter it and redistribute it
  11   freely, subject to the following restrictions:
  12
  13   1. The origin of this software must not be misrepresented; you must not
  14      claim that you wrote the original software. If you use this software
  15      in a product, an acknowledgment in the product documentation would be
  16      appreciated but is not required.
  17   2. Altered source versions must be plainly marked as such, and must not be
  18      misrepresented as being the original software.
  19   3. This notice may not be removed or altered from any source distribution.
  20
  21
  22 */
  23
  24 #ifdef _WIN32
  25 #include <windows.h>
  26 #endif
  27 #include <math.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <memory.h>
  31 #include "convoengine.h"
  32
  33 #include "denormal.h"
  34
  35 //#define TIMING
  36 #include "timing.c"
  37
  38 #define CONVOENGINE_SILENCE_THRESH 1.0e-12 // -240dB
  39 #define CONVOENGINE_IMPULSE_SILENCE_THRESH 1.0e-15 // -300dB
  40
  41 static void WDL_CONVO_CplxMul2(WDL_FFT_COMPLEX *c, WDL_FFT_COMPLEX *a, WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
  42 {
  43   WDL_FFT_REAL t1, t2, t3, t4, t5, t6, t7, t8;
  44   if (n<2 || (n&1)) return;
  45
  46   do {
  47     t1 = a[0].re * b[0].re;
  48     t2 = a[0].im * b[0].im;
  49     t3 = a[0].im * b[0].re;
  50     t4 = a[0].re * b[0].im;
  51     t5 = a[1].re * b[1].re;
  52     t6 = a[1].im * b[1].im;
  53     t7 = a[1].im * b[1].re;
  54     t8 = a[1].re * b[1].im;
  55     t1 -= t2;
  56     t3 += t4;
  57     t5 -= t6;
  58     t7 += t8;
  59     c[0].re = t1;
  60     c[1].re = t5;
  61     c[0].im = t3;
  62     c[1].im = t7;
  63     a += 2;
  64     b += 2;
  65     c += 2;
  66   } while (n -= 2);
  67 }
  68 static void WDL_CONVO_CplxMul3(WDL_FFT_COMPLEX *c, WDL_FFT_COMPLEX *a, WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
  69 {
  70   WDL_FFT_REAL t1, t2, t3, t4, t5, t6, t7, t8;
  71   if (n<2 || (n&1)) return;
  72
  73   do {
  74     t1 = a[0].re * b[0].re;
  75     t2 = a[0].im * b[0].im;
  76     t3 = a[0].im * b[0].re;
  77     t4 = a[0].re * b[0].im;
  78     t5 = a[1].re * b[1].re;
  79     t6 = a[1].im * b[1].im;
  80     t7 = a[1].im * b[1].re;
  81     t8 = a[1].re * b[1].im;
  82     t1 -= t2;
  83     t3 += t4;
  84     t5 -= t6;
  85     t7 += t8;
  86     c[0].re += t1;
  87     c[1].re += t5;
  88     c[0].im += t3;
  89     c[1].im += t7;
  90     a += 2;
  91     b += 2;
  92     c += 2;
  93   } while (n -= 2);
  94 }
  95
  96 static bool CompareQueueToBuf(WDL_FastQueue *q, const void *data, int len)
  97 {
  98   int offs=0;
  99   while (len>0)
 100   {
 101     void *td=NULL;
 102     int sz=q->GetPtr(offs,&td);
 103     if (sz<1) return true; // not enough data = not equal!
 104     if (sz>len) sz=len;
 105
 106     int i=sz/sizeof(WDL_FFT_REAL);
 107     WDL_FFT_REAL *a1=(WDL_FFT_REAL*)td;
 108     WDL_FFT_REAL *b1=(WDL_FFT_REAL*)data;
 109     while (i--)
 110     {
 111       if (fabs(*a1-*b1)>CONVOENGINE_SILENCE_THRESH) return true;
 112       a1++;
 113       b1++;
 114     }
 115
 116     data = ((char *)data)+sz;
 117     offs+=sz;
 118     len-=sz;
 119   }
 120   return false;
 121 }
 122
 123
 124 WDL_ConvolutionEngine::WDL_ConvolutionEngine()
 125 {
 126   WDL_fft_init();
 127   m_impulse_nch=1;
 128   m_fft_size=0;
 129   m_impulse_len=0;
 130   m_proc_nch=0;
 131 }
 132
 133 WDL_ConvolutionEngine::~WDL_ConvolutionEngine()
 134 {
 135 }
 136
 137 int WDL_ConvolutionEngine::SetImpulse(WDL_ImpulseBuffer *impulse, int fft_size, int impulse_sample_offset, int max_imp_size, bool forceBrute)
 138 {
 139   int impulse_len=0;
 140   int x;
 141   int nch=impulse->GetNumChannels();
 142   for (x = 0; x < nch; x ++)
 143   {
 144     int l=impulse->impulses[x].GetSize()-impulse_sample_offset;
 145     if (max_imp_size && l>max_imp_size) l=max_imp_size;
 146     if (impulse_len < l) impulse_len=l;
 147   }
 148   m_impulse_nch=nch;
 149
 150   if (m_impulse_nch>1) // detect mono signals pretending to be multichannel
 151   {
 152     for (x = 1; x < m_impulse_nch; x ++)
 153     {
 154       if (impulse->impulses[x].GetSize()!=impulse->impulses[0].GetSize()||
 155           memcmp(impulse->impulses[x].Get(),impulse->impulses[0].Get(),
 156             impulse->impulses[0].GetSize()*sizeof(WDL_FFT_REAL)))
 157             break;
 158     }
 159     if (x >= m_impulse_nch) m_impulse_nch=1;
 160   }
 161
 162   m_impulse_len=impulse_len;
 163   m_proc_nch=-1;
 164
 165
 166   if (forceBrute)
 167   {
 168     m_fft_size=0;
 169
 170     // save impulse
 171     for (x = 0; x < m_impulse_nch; x ++)
 172     {
 173       WDL_FFT_REAL *imp=impulse->impulses[x].Get()+impulse_sample_offset;
 174       int lenout=impulse->impulses[x].GetSize()-impulse_sample_offset;
 175       if (max_imp_size && lenout>max_imp_size) lenout=max_imp_size;
 176
 177       WDL_CONVO_IMPULSEBUFf *impout=m_impulse[x].Resize(lenout)+lenout;
 178       while (lenout-->0) *--impout = (WDL_CONVO_IMPULSEBUFf) *imp++;
 179     }
 180
 181     for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
 182     {
 183       m_samplesin[x].Clear();
 184       m_samplesin2[x].Clear();
 185       m_samplesout[x].Clear();
 186     }
 187
 188     return 0;
 189   }
 190
 191
 192   if (fft_size<=0)
 193   {
 194     int msz=fft_size<=-16? -fft_size*2 : 32768;
 195
 196     fft_size=32;
 197     while (fft_size < impulse_len*2 && fft_size < msz) fft_size*=2;
 198   }
 199
 200   m_fft_size=fft_size;
 201
 202   int impchunksize=fft_size/2;
 203   int nblocks=(impulse_len+impchunksize-1)/impchunksize;
 204   //char buf[512];
 205   //sprintf(buf,"il=%d, ffts=%d, cs=%d, nb=%d\n",impulse_len,fft_size,impchunksize,nblocks);
 206   //OutputDebugString(buf);
 207
 208   const bool smallerSizeMode=sizeof(WDL_CONVO_IMPULSEBUFf)!=sizeof(WDL_FFT_REAL);
 209
 210   WDL_FFT_REAL scale=(WDL_FFT_REAL) (1.0/fft_size);
 211   for (x = 0; x < m_impulse_nch; x ++)
 212   {
 213     WDL_FFT_REAL *imp=impulse->impulses[x].Get()+impulse_sample_offset;
 214
 215     WDL_FFT_REAL *imp2=x < m_impulse_nch-1 ? impulse->impulses[x+1].Get()+impulse_sample_offset : NULL;
 216
 217     WDL_CONVO_IMPULSEBUFf *impout=m_impulse[x].Resize((nblocks+!!smallerSizeMode)*fft_size*2);
 218     char *zbuf=m_impulse_zflag[x].Resize(nblocks);
 219     int lenout=impulse->impulses[x].GetSize()-impulse_sample_offset;
 220     if (max_imp_size && lenout>max_imp_size) lenout=max_imp_size;
 221
 222     int bl;
 223     for (bl = 0; bl < nblocks; bl ++)
 224     {
 225
 226       int thissz=lenout;
 227       if (thissz > impchunksize) thissz=impchunksize;
 228
 229       lenout -= thissz;
 230       int i=0;
 231       WDL_FFT_REAL mv=0.0;
 232       WDL_FFT_REAL mv2=0.0;
 233       WDL_FFT_REAL *imptmp = (WDL_FFT_REAL *)impout; //-V615
 234
 235       for (; i < thissz; i ++)
 236       {
 237         WDL_FFT_REAL v=*imp++;
 238         WDL_FFT_REAL v2=(WDL_FFT_REAL)fabs(v);
 239         if (v2 > mv) mv=v2;
 240
 241         imptmp[i*2]=denormal_filter_aggressive(v * scale);
 242
 243         if (imp2)
 244         {
 245           v=*imp2++;
 246           v2=(WDL_FFT_REAL)fabs(v);
 247           if (v2>mv2) mv2=v2;
 248           imptmp[i*2+1]=denormal_filter_aggressive(v*scale);
 249         }
 250         else imptmp[i*2+1]=0.0;
 251       }
 252       for (; i < fft_size; i ++)
 253       {
 254         imptmp[i*2]=0.0;
 255         imptmp[i*2+1]=0.0;
 256       }
 257       if (mv>CONVOENGINE_IMPULSE_SILENCE_THRESH||mv2>CONVOENGINE_IMPULSE_SILENCE_THRESH)
 258       {
 259         *zbuf++=mv>CONVOENGINE_IMPULSE_SILENCE_THRESH ? 2 : 1; // 1 means only second channel has content
 260         WDL_fft((WDL_FFT_COMPLEX*)impout,fft_size,0);
 261
 262         if (smallerSizeMode)
 263         {
 264           int x,n=fft_size*2;
 265           for(x=0;x<n;x++) impout[x]=(WDL_CONVO_IMPULSEBUFf)imptmp[x];
 266         }
 267       }
 268       else *zbuf++=0;
 269
 270       impout+=fft_size*2;
 271     }
 272   }
 273   return m_fft_size/2;
 274 }
 275
 276
 277 void WDL_ConvolutionEngine::Reset() // clears out any latent samples
 278 {
 279   int x;
 280   memset(m_hist_pos,0,sizeof(m_hist_pos));
 281   for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
 282   {
 283     m_samplesin[x].Clear();
 284     m_samplesin2[x].Clear();
 285     m_samplesout[x].Clear();
 286     memset(m_samplehist_zflag[x].Get(),0,m_samplehist_zflag[x].GetSize());
 287     memset(m_samplehist[x].Get(),0,m_samplehist[x].GetSize()*sizeof(WDL_FFT_REAL));
 288     memset(m_overlaphist[x].Get(),0,m_overlaphist[x].GetSize()*sizeof(WDL_FFT_REAL));
 289   }
 290 }
 291
 292 void WDL_ConvolutionEngine::Add(WDL_FFT_REAL **bufs, int len, int nch)
 293 {
 294   if (m_fft_size<1)
 295   {
 296     int ch;
 297     m_proc_nch=nch;
 298     for (ch = 0; ch < nch; ch ++)
 299     {
 300       int wch=ch;
 301       if (wch >=m_impulse_nch) wch-=m_impulse_nch;
 302       WDL_CONVO_IMPULSEBUFf *imp=m_impulse[wch].Get();
 303       int imp_len = m_impulse[wch].GetSize();
 304
 305
 306       if (imp_len>0)
 307       {
 308         if (m_samplesin2[ch].Available()<imp_len*(int)sizeof(WDL_FFT_REAL))
 309         {
 310           int sza=imp_len*sizeof(WDL_FFT_REAL)-m_samplesin2[ch].Available();
 311           memset(m_samplesin2[ch].Add(NULL,sza),0,sza);
 312         }
 313         WDL_FFT_REAL *psrc;
 314
 315         if (bufs && bufs[ch])
 316           psrc=(WDL_FFT_REAL*)m_samplesin2[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
 317         else
 318         {
 319           psrc=(WDL_FFT_REAL*)m_samplesin2[ch].Add(NULL,len*sizeof(WDL_FFT_REAL));
 320           memset(psrc,0,len*sizeof(WDL_FFT_REAL));
 321         }
 322
 323         WDL_FFT_REAL *pout=(WDL_FFT_REAL*)m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL));
 324         int x;
 325         int len1 = len&~1;
 326         for (x=0; x < len1 ; x += 2)
 327         {
 328           int i=imp_len;
 329           double sum=0.0,sum2=0.0;
 330           WDL_FFT_REAL *sp=psrc+x-imp_len + 1;
 331           WDL_CONVO_IMPULSEBUFf *ip=imp;
 332           int j=i/4; i&=3;
 333           while (j--) // produce 2 samples, 4 impulse samples at a time
 334           {
 335             double a = ip[0],b=ip[1],aa=ip[2],bb=ip[3];
 336             double c = sp[1],d=sp[2],cc=sp[3];
 337             sum+=a * sp[0] + b * c + aa * d + bb * cc;
 338             sum2+=a * c + b * d + aa * cc + bb * sp[4];
 339             ip+=4;
 340             sp+=4;
 341           }
 342
 343           while (i--)
 344           {
 345             double a = *ip++;
 346             sum+=a * sp[0];
 347             sum2+=a * sp[1];
 348             sp++;
 349           }
 350           pout[x]=(WDL_FFT_REAL) sum;
 351           pout[x+1]=(WDL_FFT_REAL) sum2;
 352         }
 353         for(;x<len;x++) // any odd samples left
 354         {
 355           int i=imp_len;
 356           double sum=0.0;
 357           WDL_FFT_REAL *sp=psrc+x-imp_len + 1;
 358           WDL_CONVO_IMPULSEBUFf *ip=imp;
 359           int j=i/4; i&=3;
 360           while (j--)
 361           {
 362             sum+=ip[0] * sp[0] + ip[1] * sp[1] + ip[2] * sp[2] + ip[3] * sp[3];
 363             ip+=4;
 364             sp+=4;
 365           }
 366
 367           while (i--) sum+=*ip++ * *sp++;
 368           pout[x]=(WDL_FFT_REAL) sum;
 369         }
 370         m_samplesin2[ch].Advance(len*sizeof(WDL_FFT_REAL));
 371         m_samplesin2[ch].Compact();
 372       }
 373       else
 374       {
 375         if (bufs && bufs[ch]) m_samplesout[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
 376         else
 377         {
 378           memset(m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
 379         }
 380       }
 381
 382     }
 383     return;
 384   }
 385
 386
 387   int impchunksize=m_fft_size/2;
 388   int nblocks=(m_impulse_len+impchunksize-1)/impchunksize;
 389
 390   if (m_proc_nch != nch)
 391   {
 392     m_proc_nch=nch;
 393     memset(m_hist_pos,0,sizeof(m_hist_pos));
 394     int x;
 395     int mso=0;
 396     for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
 397     {
 398       int so=m_samplesin[x].Available() + m_samplesout[x].Available();
 399       if (so>mso) mso=so;
 400
 401       if (x>=nch)
 402       {
 403         m_samplesin[x].Clear();
 404         m_samplesout[x].Clear();
 405       }
 406       else
 407       {
 408         if (m_impulse_len<1||!nblocks)
 409         {
 410           if (m_samplesin[x].Available())
 411           {
 412             int s=m_samplesin[x].Available();
 413             void *buf=m_samplesout[x].Add(NULL,s);
 414             m_samplesin[x].GetToBuf(0,buf,s);
 415             m_samplesin[x].Clear();
 416           }
 417         }
 418
 419         if (so < mso)
 420         {
 421           memset(m_samplesout[x].Add(NULL,mso-so),0,mso-so);
 422         }
 423       }
 424
 425       int sz=0;
 426       if (x<nch) sz=nblocks*m_fft_size;
 427
 428       memset(m_samplehist_zflag[x].Resize(nblocks),0,nblocks);
 429       m_samplehist[x].Resize(sz*2);
 430       m_overlaphist[x].Resize(x<nch ? m_fft_size/2 : 0);
 431       memset(m_samplehist[x].Get(),0,m_samplehist[x].GetSize()*sizeof(WDL_FFT_REAL));
 432       memset(m_overlaphist[x].Get(),0,m_overlaphist[x].GetSize()*sizeof(WDL_FFT_REAL));
 433     }
 434   }
 435
 436   int ch;
 437   if (m_impulse_len<1||!nblocks)
 438   {
 439     for (ch = 0; ch < nch; ch ++)
 440     {
 441       if (bufs && bufs[ch])
 442         m_samplesout[ch].Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
 443       else
 444         memset(m_samplesout[ch].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
 445     }
 446     // pass through
 447     return;
 448   }
 449
 450   for (ch = 0; ch < nch; ch ++)
 451   {
 452     if (!m_samplehist[ch].GetSize()||!m_overlaphist[ch].GetSize()) continue;
 453
 454     m_samplesin[ch].Add(bufs ? bufs[ch] : NULL,len*sizeof(WDL_FFT_REAL));
 455
 456   }
 457 }
 458
 459 void WDL_ConvolutionEngine::AddSilenceToOutput(int len, int nch)
 460 {
 461   int x;
 462   for(x=0;x<nch&&x<m_proc_nch;x++)
 463   {
 464     memset(m_samplesout[x].Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
 465   }
 466 }
 467
 468 int WDL_ConvolutionEngine::Avail(int want)
 469 {
 470   if (m_fft_size<1)
 471   {
 472     return m_samplesout[0].Available()/sizeof(WDL_FFT_REAL);
 473   }
 474
 475   const int sz=m_fft_size/2;
 476   const int chunksize=m_fft_size/2;
 477   const int nblocks=(m_impulse_len+chunksize-1)/chunksize;
 478   // clear combining buffer
 479   WDL_FFT_REAL *workbuf2 = m_combinebuf.Resize(m_fft_size*4); // temp space
 480
 481   int ch;
 482
 483   for (ch = 0; ch < m_proc_nch; ch ++)
 484   {
 485     if (!m_samplehist[ch].GetSize()||!m_overlaphist[ch].GetSize()) continue;
 486     int srcc=ch;
 487     if (srcc>=m_impulse_nch) srcc=m_impulse_nch-1;
 488
 489     bool allow_mono_input_mode=true;
 490     bool mono_impulse_mode=false;
 491
 492     if (m_impulse_nch==1 && ch<m_proc_nch-1 &&
 493         m_samplehist[ch+1].GetSize()&&m_overlaphist[ch+1].GetSize() &&
 494         m_samplesin[ch].Available()==m_samplesin[ch+1].Available() &&
 495         m_samplesout[ch].Available()==m_samplesout[ch+1].Available()
 496         )
 497     { // 2x processing mode
 498       mono_impulse_mode=true;
 499       allow_mono_input_mode=false;
 500     }
 501
 502
 503     const int in_needed=sz;
 504
 505     // useSilentList[x] = 1 for mono signal, 2 for stereo, 0 for silent
 506     char *useSilentList=m_samplehist_zflag[ch].GetSize()==nblocks ? m_samplehist_zflag[ch].Get() : NULL;
 507     while (m_samplesin[ch].Available()/(int)sizeof(WDL_FFT_REAL) >= sz &&
 508            m_samplesout[ch].Available() < want*(int)sizeof(WDL_FFT_REAL))
 509     {
 510       int histpos;
 511       if ((histpos=++m_hist_pos[ch]) >= nblocks) histpos=m_hist_pos[ch]=0;
 512
 513       // get samples from input, to history
 514       WDL_FFT_REAL *optr = m_samplehist[ch].Get()+histpos*m_fft_size*2;
 515
 516       m_samplesin[ch].GetToBuf(0,optr+sz,in_needed*sizeof(WDL_FFT_REAL));
 517       m_samplesin[ch].Advance(in_needed*sizeof(WDL_FFT_REAL));
 518
 519
 520       bool mono_input_mode=false;
 521
 522       bool nonzflag=false;
 523       if (mono_impulse_mode)
 524       {
 525         if (++m_hist_pos[ch+1] >= nblocks) m_hist_pos[ch+1]=0;
 526         m_samplesin[ch+1].GetToBuf(0,workbuf2,sz*sizeof(WDL_FFT_REAL));
 527         m_samplesin[ch+1].Advance(sz*sizeof(WDL_FFT_REAL));
 528         int i;
 529         for (i = 0; i < sz; i ++) // unpack samples
 530         {
 531           WDL_FFT_REAL f = optr[i*2]=denormal_filter_aggressive(optr[sz+i]);
 532           if (!nonzflag && (f<-CONVOENGINE_SILENCE_THRESH || f>CONVOENGINE_SILENCE_THRESH)) nonzflag=true;
 533           f=optr[i*2+1]=denormal_filter_aggressive(workbuf2[i]);
 534           if (!nonzflag && (f<-CONVOENGINE_SILENCE_THRESH || f>CONVOENGINE_SILENCE_THRESH)) nonzflag=true;
 535         }
 536       }
 537       else
 538       {
 539         if (allow_mono_input_mode &&
 540           ch < m_proc_nch-1 &&
 541           srcc<m_impulse_nch-1 &&
 542           !CompareQueueToBuf(&m_samplesin[ch+1],optr+sz,sz*sizeof(WDL_FFT_REAL))
 543           )
 544         {
 545           mono_input_mode=true;
 546         }
 547         else
 548         {
 549           allow_mono_input_mode=false;
 550         }
 551
 552         int i;
 553         for (i = 0; i < sz; i ++) // unpack samples
 554         {
 555           WDL_FFT_REAL f=optr[i*2]=denormal_filter_aggressive(optr[sz+i]);
 556           optr[i*2+1]=0.0;
 557           if (!nonzflag && (f<-CONVOENGINE_SILENCE_THRESH || f>CONVOENGINE_SILENCE_THRESH)) nonzflag=true;
 558         }
 559       }
 560
 561       int i;
 562       for (i = 1; mono_input_mode && i < nblocks; i ++) // start @ 1, since hist[histpos] is no longer used for here
 563       {
 564         int srchistpos = histpos-i;
 565         if (srchistpos < 0) srchistpos += nblocks;
 566         if (!useSilentList || useSilentList[srchistpos]==2) mono_input_mode=false;
 567       }
 568
 569       if (nonzflag||!useSilentList) memset(optr+sz*2,0,sz*2*sizeof(WDL_FFT_REAL));
 570
 571
 572 #ifdef WDLCONVO_ZL_ACCOUNTING
 573       m_zl_fftcnt++;
 574 #endif
 575
 576       if (nonzflag) WDL_fft((WDL_FFT_COMPLEX*)optr,m_fft_size,0);
 577
 578       if (useSilentList) useSilentList[histpos]=nonzflag ? (mono_input_mode ? 1 : 2) : 0;
 579
 580       int mzfl=2;
 581       if (mono_input_mode)
 582       {
 583         mzfl=1;
 584
 585         m_samplesin[ch+1].Advance(sz*sizeof(WDL_FFT_REAL));
 586
 587         // save a valid copy in sample hist incase we switch from mono to stereo
 588         if (++m_hist_pos[ch+1] >= nblocks) m_hist_pos[ch+1]=0;
 589         WDL_FFT_REAL *optr2 = m_samplehist[ch+1].Get()+m_hist_pos[ch+1]*m_fft_size*2;
 590         memcpy(optr2,optr,m_fft_size*2*sizeof(WDL_FFT_REAL));
 591       }
 592
 593       int applycnt=0;
 594       char *useImpSilentList=m_impulse_zflag[srcc].GetSize() == nblocks ? m_impulse_zflag[srcc].Get() : NULL;
 595
 596       WDL_CONVO_IMPULSEBUFf *impulseptr=m_impulse[srcc].Get();
 597       for (i = 0; i < nblocks; i ++, impulseptr+=m_fft_size*2)
 598       {
 599         int srchistpos = histpos-i;
 600         if (srchistpos < 0) srchistpos += nblocks;
 601
 602         if (useImpSilentList && useImpSilentList[i]<mzfl) continue;
 603         if (useSilentList && !useSilentList[srchistpos]) continue; // silent block
 604
 605         WDL_FFT_REAL *samplehist=m_samplehist[ch].Get() + m_fft_size*srchistpos*2;
 606
 607         if (applycnt++) // add to output
 608           WDL_CONVO_CplxMul3((WDL_FFT_COMPLEX*)workbuf2,(WDL_FFT_COMPLEX*)samplehist,(WDL_CONVO_IMPULSEBUFCPLXf*)impulseptr,m_fft_size);
 609         else // replace output
 610           WDL_CONVO_CplxMul2((WDL_FFT_COMPLEX*)workbuf2,(WDL_FFT_COMPLEX*)samplehist,(WDL_CONVO_IMPULSEBUFCPLXf*)impulseptr,m_fft_size);
 611
 612       }
 613       if (!applycnt)
 614         memset(workbuf2,0,m_fft_size*2*sizeof(WDL_FFT_REAL));
 615       else
 616         WDL_fft((WDL_FFT_COMPLEX*)workbuf2,m_fft_size,1);
 617
 618       WDL_FFT_REAL *olhist=m_overlaphist[ch].Get(); // errors from last time
 619       WDL_FFT_REAL *p1=workbuf2,*p3=workbuf2+m_fft_size,*p1o=workbuf2;
 620
 621       if (mono_impulse_mode||mono_input_mode)
 622       {
 623         WDL_FFT_REAL *p2o=workbuf2+m_fft_size*2;
 624         WDL_FFT_REAL *olhist2=m_overlaphist[ch+1].Get(); // errors from last time
 625         int s=sz/2;
 626         while (s--)
 627         {
 628           p2o[0] = p1[1]+olhist2[0];
 629           p2o[1] = p1[3]+olhist2[1];
 630           p1o[0] = p1[0]+olhist[0];
 631           p1o[1] = p1[2]+olhist[1];
 632           p1o+=2;
 633           p2o+=2;
 634           p1+=4;
 635
 636           olhist[0]=p3[0];
 637           olhist[1]=p3[2];
 638           olhist2[0]=p3[1];
 639           olhist2[1]=p3[3];
 640           p3+=4;
 641
 642           olhist+=2;
 643           olhist2+=2;
 644         }
 645         // add samples to output
 646         m_samplesout[ch].Add(workbuf2,sz*sizeof(WDL_FFT_REAL));
 647         m_samplesout[ch+1].Add(workbuf2+m_fft_size*2,sz*sizeof(WDL_FFT_REAL));
 648       }
 649       else
 650       {
 651         int s=sz/2;
 652         while (s--)
 653         {
 654           p1o[0] = p1[0]+olhist[0];
 655           p1o[1] = p1[2]+olhist[1];
 656           p1o+=2;
 657           p1+=4;
 658
 659           olhist[0]=p3[0];
 660           olhist[1]=p3[2];
 661           p3+=4;
 662
 663           olhist+=2;
 664         }
 665         // add samples to output
 666         m_samplesout[ch].Add(workbuf2,sz*sizeof(WDL_FFT_REAL));
 667       }
 668     } // while available
 669
 670     if (mono_impulse_mode) ch++;
 671   }
 672
 673   int mv = want;
 674   for (ch=0;ch<m_proc_nch;ch++)
 675   {
 676     int v = m_samplesout[ch].Available()/sizeof(WDL_FFT_REAL);
 677     if (!ch || v<mv)mv=v;
 678   }
 679   return mv;
 680 }
 681
 682 WDL_FFT_REAL **WDL_ConvolutionEngine::Get()
 683 {
 684   int x;
 685   for (x = 0; x < m_proc_nch; x ++)
 686   {
 687     m_get_tmpptrs[x]=(WDL_FFT_REAL *)m_samplesout[x].Get();
 688   }
 689   return m_get_tmpptrs;
 690 }
 691
 692 void WDL_ConvolutionEngine::Advance(int len)
 693 {
 694   int x;
 695   for (x = 0; x < m_proc_nch; x ++)
 696   {
 697     m_samplesout[x].Advance(len*sizeof(WDL_FFT_REAL));
 698     m_samplesout[x].Compact();
 699   }
 700 }
 701
 702
 703
 704 /****************************************************************
 705 **  low latency version
 706 */
 707
 708 WDL_ConvolutionEngine_Div::WDL_ConvolutionEngine_Div()
 709 {
 710   timingInit();
 711   m_proc_nch=2;
 712   m_need_feedsilence=true;
 713 }
 714
 715 int WDL_ConvolutionEngine_Div::SetImpulse(WDL_ImpulseBuffer *impulse, int maxfft_size, int known_blocksize, int max_imp_size, int impulse_offset, int latency_allowed)
 716 {
 717   m_need_feedsilence=true;
 718
 719   m_engines.Empty(true);
 720   if (maxfft_size<0)maxfft_size=-maxfft_size;
 721   maxfft_size*=2;
 722   if (!maxfft_size || maxfft_size>32768) maxfft_size=32768;
 723
 724
 725   const int MAX_SIZE_FOR_BRUTE=64;
 726
 727   int fftsize = MAX_SIZE_FOR_BRUTE;
 728   int impulsechunksize = MAX_SIZE_FOR_BRUTE;
 729
 730   if (known_blocksize && !(known_blocksize&(known_blocksize-1)) && known_blocksize>MAX_SIZE_FOR_BRUTE*2)
 731   {
 732     fftsize=known_blocksize/2;
 733     impulsechunksize=known_blocksize/2;
 734   }
 735   if (latency_allowed*2 > fftsize)
 736   {
 737     int x = 16;
 738     while (x <= latency_allowed) x*=2;
 739     if (x>32768) x=32768;
 740     fftsize=impulsechunksize=x;
 741   }
 742
 743   int offs=0;
 744   int samplesleft=impulse->impulses[0].GetSize()-impulse_offset;
 745   if (max_imp_size>0 && samplesleft>max_imp_size) samplesleft=max_imp_size;
 746
 747   do
 748   {
 749     WDL_ConvolutionEngine *eng=new WDL_ConvolutionEngine;
 750
 751     bool wantBrute = !latency_allowed && !offs;
 752     if (impulsechunksize*(wantBrute ? 2 : 3) >= samplesleft) impulsechunksize=samplesleft; // early-out, no point going to a larger FFT (since if we did this, we wouldnt have enough samples for a complete next pass)
 753     if (fftsize>=maxfft_size) { impulsechunksize=samplesleft; fftsize=maxfft_size; } // if FFTs are as large as possible, finish up
 754
 755     eng->SetImpulse(impulse,fftsize,offs+impulse_offset,impulsechunksize, wantBrute);
 756     eng->m_zl_delaypos = offs;
 757     eng->m_zl_dumpage=0;
 758     m_engines.Add(eng);
 759
 760 #ifdef WDLCONVO_ZL_ACCOUNTING
 761     char buf[512];
 762     wsprintf(buf,"ce%d: offs=%d, len=%d, fftsize=%d\n",m_engines.GetSize(),offs,impulsechunksize,fftsize);
 763     OutputDebugString(buf);
 764 #endif
 765
 766     samplesleft -= impulsechunksize;
 767     offs+=impulsechunksize;
 768
 769 #if 1 // this seems about 10% faster (maybe due to better cache use from less sized ffts used?)
 770     impulsechunksize=offs*3;
 771     fftsize=offs*2;
 772 #else
 773     impulsechunksize=fftsize;
 774
 775     fftsize*=2;
 776 #endif
 777   }
 778   while (samplesleft > 0);
 779
 780   return GetLatency();
 781 }
 782
 783 int WDL_ConvolutionEngine_Div::GetLatency()
 784 {
 785   return m_engines.GetSize() ? m_engines.Get(0)->GetLatency() : 0;
 786 }
 787
 788
 789 void WDL_ConvolutionEngine_Div::Reset()
 790 {
 791   int x;
 792   for (x = 0; x < m_engines.GetSize(); x ++)
 793   {
 794     WDL_ConvolutionEngine *eng=m_engines.Get(x);
 795     eng->Reset();
 796   }
 797   for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
 798   {
 799     m_samplesout[x].Clear();
 800   }
 801
 802   m_need_feedsilence=true;
 803 }
 804
 805 WDL_ConvolutionEngine_Div::~WDL_ConvolutionEngine_Div()
 806 {
 807   timingPrint();
 808   m_engines.Empty(true);
 809 }
 810
 811 void WDL_ConvolutionEngine_Div::Add(WDL_FFT_REAL **bufs, int len, int nch)
 812 {
 813   m_proc_nch=nch;
 814
 815   bool ns=m_need_feedsilence;
 816   m_need_feedsilence=false;
 817
 818   int x;
 819   for (x = 0; x < m_engines.GetSize(); x ++)
 820   {
 821     WDL_ConvolutionEngine *eng=m_engines.Get(x);
 822     if (ns)
 823     {
 824       eng->m_zl_dumpage = (x>0 && x < m_engines.GetSize()-1) ? (eng->GetLatency()/4) : 0; // reduce max number of ffts per block by staggering them
 825
 826       if (eng->m_zl_dumpage>0)
 827         eng->Add(NULL,eng->m_zl_dumpage,nch); // added silence to input (to control when fft happens)
 828     }
 829
 830     eng->Add(bufs,len,nch);
 831
 832     if (ns) eng->AddSilenceToOutput(eng->m_zl_delaypos,nch); // add silence to output (to delay output to its correct time)
 833
 834   }
 835 }
 836 WDL_FFT_REAL **WDL_ConvolutionEngine_Div::Get()
 837 {
 838   int x;
 839   for (x = 0; x < m_proc_nch; x ++)
 840   {
 841     m_get_tmpptrs[x]=(WDL_FFT_REAL *)m_samplesout[x].Get();
 842   }
 843   return m_get_tmpptrs;
 844 }
 845
 846 void WDL_ConvolutionEngine_Div::Advance(int len)
 847 {
 848   int x;
 849   for (x = 0; x < m_proc_nch; x ++)
 850   {
 851     m_samplesout[x].Advance(len*sizeof(WDL_FFT_REAL));
 852     m_samplesout[x].Compact();
 853   }
 854 }
 855
 856 int WDL_ConvolutionEngine_Div::Avail(int wantSamples)
 857 {
 858   timingEnter(1);
 859   int wso=wantSamples;
 860   int x;
 861 #ifdef WDLCONVO_ZL_ACCOUNTING
 862   int cnt=0;
 863   static int maxcnt=-1;
 864   int h=0;
 865 #endif
 866   for (x = 0; x < m_engines.GetSize(); x ++)
 867   {
 868     WDL_ConvolutionEngine *eng=m_engines.Get(x);
 869 #ifdef WDLCONVO_ZL_ACCOUNTING
 870     eng->m_zl_fftcnt=0;
 871 #endif
 872     int a=eng->Avail(wso+eng->m_zl_dumpage) - eng->m_zl_dumpage;
 873 #ifdef WDLCONVO_ZL_ACCOUNTING
 874     cnt += !!eng->m_zl_fftcnt;
 875
 876 #if 0
 877     if (eng->m_zl_fftcnt)
 878       h|=1<<x;
 879
 880     if (eng->m_zl_fftcnt && x==m_engines.GetSize()-1 && cnt>1)
 881     {
 882       char buf[512];
 883       wsprintf(buf,"fft flags=%08x (%08x=max)\n",h,1<<x);
 884       OutputDebugString(buf);
 885     }
 886 #endif
 887 #endif
 888     if (a < wantSamples) wantSamples=a;
 889   }
 890
 891 #ifdef WDLCONVO_ZL_ACCOUNTING
 892   static DWORD lastt=0;
 893   if (cnt>maxcnt)maxcnt=cnt;
 894   if (GetTickCount()>lastt+1000)
 895   {
 896     lastt=GetTickCount();
 897     char buf[512];
 898     wsprintf(buf,"maxcnt=%d\n",maxcnt);
 899     OutputDebugString(buf);
 900     maxcnt=-1;
 901   }
 902 #endif
 903   if (wantSamples>0)
 904   {
 905     WDL_FFT_REAL *tp[WDL_CONVO_MAX_PROC_NCH];
 906     for (x =0; x < m_proc_nch; x ++)
 907     {
 908       memset(tp[x]=(WDL_FFT_REAL*)m_samplesout[x].Add(NULL,wantSamples*sizeof(WDL_FFT_REAL)),0,wantSamples*sizeof(WDL_FFT_REAL));
 909     }
 910
 911     for (x = 0; x < m_engines.GetSize(); x ++)
 912     {
 913       WDL_ConvolutionEngine *eng=m_engines.Get(x);
 914       if (eng->m_zl_dumpage>0) { eng->Advance(eng->m_zl_dumpage); eng->m_zl_dumpage=0; }
 915
 916       WDL_FFT_REAL **p=eng->Get();
 917       if (p)
 918       {
 919         int i;
 920         for (i =0; i < m_proc_nch; i ++)
 921         {
 922           WDL_FFT_REAL *o=tp[i];
 923           WDL_FFT_REAL *in=p[i];
 924           int j=wantSamples;
 925           while (j-->0) *o++ += *in++;
 926         }
 927       }
 928       eng->Advance(wantSamples);
 929     }
 930   }
 931   timingLeave(1);
 932
 933   int av=m_samplesout[0].Available()/sizeof(WDL_FFT_REAL);
 934   return av>wso ? wso : av;
 935 }
 936
 937
 938 #ifdef WDL_TEST_CONVO
 939
 940 #include <stdio.h>
 941
 942 int main(int argc, char **argv)
 943 {
 944   if (argc!=5)
 945   {
 946     printf("usage: convoengine fftsize implen oneoffs pingoffs\n");
 947     return -1;
 948   }
 949
 950   int fftsize=atoi(argv[1]);
 951   int implen = atoi(argv[2]);
 952   int oneoffs = atoi(argv[3]);
 953   int pingoffs=atoi(argv[4]);
 954
 955   if (implen < 1 || oneoffs < 0 || oneoffs >= implen || pingoffs < 0)
 956   {
 957     printf("invalid parameters\n");
 958     return -1;
 959   }
 960
 961   WDL_ImpulseBuffer imp;
 962   imp.nch=1;
 963   memset(imp.impulses[0].Resize(implen),0,implen*sizeof(WDL_FFT_REAL));
 964   imp.impulses[0].Get()[oneoffs]=1.0;
 965
 966
 967 #if WDL_TEST_CONVO==2
 968   WDL_ConvolutionEngine_Div engine;
 969 #else
 970   WDL_ConvolutionEngine engine;
 971 #endif
 972   engine.SetImpulse(&imp,fftsize);
 973   WDL_TypedBuf<WDL_FFT_REAL> m_tmpbuf;
 974   memset(m_tmpbuf.Resize(pingoffs+1),0,pingoffs*sizeof(WDL_FFT_REAL));
 975   m_tmpbuf.Get()[pingoffs]=1.0;
 976   WDL_FFT_REAL *p=m_tmpbuf.Get();
 977   engine.Add(&p,pingoffs+1,1);
 978
 979   p=m_tmpbuf.Resize(4096);
 980   memset(p,0,m_tmpbuf.GetSize()*sizeof(WDL_FFT_REAL));
 981
 982   int avail;
 983   while ((avail=engine.Avail(pingoffs+oneoffs + 8192)) < pingoffs+oneoffs + 8192)
 984   {
 985     engine.Add(&p,4096,1);
 986   }
 987   WDL_FFT_REAL **output = engine.Get();
 988   if (!output || !*output)
 989   {
 990     printf("cant get output\n");
 991     return -1;
 992   }
 993   int x;
 994   for (x = 0; x < avail; x ++)
 995   {
 996     WDL_FFT_REAL val=output[0][x];
 997     WDL_FFT_REAL expval = (x==pingoffs+oneoffs) ? 1.0:0.0;
 998     if (fabs(val-expval)>0.000000001)
 999     {
1000       printf("%d: %.4fdB - %f %f\n",x,log10(max(val,0.000000000001))*20.0 - log10(max(expval,0.000000000001))*20.0,val,expval);
1001     }
1002   }
1003
1004   return 0;
1005 }
1006
1007 #endif
1008
1009
1010 int WDL_ImpulseBuffer::SetLength(int samples)
1011 {
1012   int x;
1013   for(x=0;x<m_nch;x++)
1014   {
1015     int cursz=impulses[x].GetSize();
1016     if (cursz!=samples)
1017     {
1018       impulses[x].Resize(samples,false);
1019
1020       if (impulses[x].GetSize()!=samples) // validate length!
1021       {
1022         // ERROR! FREE ALL!
1023         for(x=0;x<WDL_CONVO_MAX_IMPULSE_NCH;x++) impulses[x].Resize(0);
1024         return 0;
1025       }
1026     }
1027
1028     if (cursz<samples)
1029       memset(impulses[x].Get()+cursz,0,(samples-cursz)*sizeof(WDL_FFT_REAL));
1030   }
1031   return impulses[0].GetSize();
1032 }
1033
1034
1035 void WDL_ImpulseBuffer::SetNumChannels(int usench)
1036 {
1037   if (usench<1) usench=1;
1038   else if (usench>WDL_CONVO_MAX_IMPULSE_NCH) usench=WDL_CONVO_MAX_IMPULSE_NCH;
1039
1040   if (usench > m_nch)
1041   {
1042     const int old_nch = m_nch;
1043     m_nch = usench;
1044     const int len = SetLength(GetLength());
1045
1046     int x,ax=0;
1047     if (len>0) for(x=old_nch;x<usench;x++)
1048     {
1049       memcpy(impulses[x].Get(),impulses[ax].Get(),len*sizeof(WDL_FFT_REAL)); // duplicate channels
1050       if (++ax>=old_nch) ax=0;
1051     }
1052   }
1053   else if (usench<m_nch)
1054   {
1055     m_nch=usench;
1056     int x;
1057     for(x=usench;x<WDL_CONVO_MAX_IMPULSE_NCH;x++) impulses[x].Resize(0,false);
1058   }
1059 }