r123: Merged HEAD and TEST. New stuff shall be committed to HEAD from now on.
[cinelerra_cv/mob.git] / plugins / toolame / psy.c
blobb9fdf0d16da5cc09829b8c35a1cd6dc30a0c2366
1 #include <stdlib.h>
2 #include "common.h"
3 #include "encoder.h"
4 #include "absthr.h"
6 /* The static variables "r", "phi_sav", "new", "old" and "oldest" have */
7 /* to be remembered for the unpredictability measure. For "r" and */
8 /* "phi_sav", the first index from the left is the channel select and */
9 /* the second index is the "age" of the data. */
11 static int new = 0, old = 1, oldest = 0;
12 static int init = 0, flush, sync_flush, syncsize, sfreq_idx;
14 static double nmt = 5.5;
16 static FLOAT crit_band[27] = {
17 0.0, 100.0, 200.0, 300.0, 400.0, 510.0, 630.0, 770.0,
18 920.0, 1080.0, 1270.0, 1480.0, 1720.0, 2000.0, 2320.0, 2700.0,
19 3150.0, 3700.0, 4400.0, 5300.0, 6400.0, 7700.0, 9500.0, 12000.0,
20 15500.0, 25000.0, 30000.0
23 static FLOAT bmax[27] = { 20.0, 20.0, 20.0, 20.0, 20.0, 17.0, 15.0,
24 10.0, 7.0, 4.4, 4.5, 4.5, 4.5, 4.5,
25 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,
26 4.5, 4.5, 4.5, 3.5, 3.5, 3.5
30 static FLOAT *grouped_c, *grouped_e, *nb, *cb, *ecb, *bc;
31 static FLOAT *wsamp_r, *wsamp_i, *phi, *energy;
32 static FLOAT *c, *fthr;
33 static F32 *snrtmp;
35 static int *numlines;
36 static int *partition;
37 static FLOAT *cbval, *rnorm;
38 static FLOAT *window;
39 static FLOAT *absthr;
40 static double *tmn;
41 static FCB *s;
42 static FHBLK *lthr;
43 static F2HBLK *r, *phi_sav;
45 void
46 cleanup_psycho_two ()
48 free (grouped_c);
49 free (grouped_e);
50 free (nb);
51 free (cb);
52 free (ecb);
53 free (bc);
54 free (wsamp_r);
55 free (wsamp_i);
56 free (phi);
57 free (energy);
58 free (c);
59 free (fthr);
60 free (snrtmp);
61 free (numlines);
62 free (partition);
63 free (cbval);
64 free (rnorm);
65 free (window);
66 free (tmn);
67 free (s);
68 free (lthr);
69 free (r);
70 free (phi_sav);
71 init = 0;
74 void
75 init_psycho_two (double sfreq)
78 unsigned int i, j;
79 FLOAT freq_mult, bval_lo;
80 double temp1, temp2, temp3;
82 grouped_c = (FLOAT *) mem_alloc (sizeof (FCB), "grouped_c");
83 grouped_e = (FLOAT *) mem_alloc (sizeof (FCB), "grouped_e");
84 nb = (FLOAT *) mem_alloc (sizeof (FCB), "nb");
85 cb = (FLOAT *) mem_alloc (sizeof (FCB), "cb");
86 ecb = (FLOAT *) mem_alloc (sizeof (FCB), "ecb");
87 bc = (FLOAT *) mem_alloc (sizeof (FCB), "bc");
88 wsamp_r = (FLOAT *) mem_alloc (sizeof (FBLK), "wsamp_r");
89 wsamp_i = (FLOAT *) mem_alloc (sizeof (FBLK), "wsamp_i");
90 phi = (FLOAT *) mem_alloc (sizeof (FBLK), "phi");
91 energy = (FLOAT *) mem_alloc (sizeof (FBLK), "energy");
92 c = (FLOAT *) mem_alloc (sizeof (FHBLK), "c");
93 fthr = (FLOAT *) mem_alloc (sizeof (FHBLK), "fthr");
94 snrtmp = (F32 *) mem_alloc (sizeof (F2_32), "snrtmp");
96 numlines = (int *) mem_alloc (sizeof (ICB), "numlines");
97 partition = (int *) mem_alloc (sizeof (IHBLK), "partition");
98 cbval = (FLOAT *) mem_alloc (sizeof (FCB), "cbval");
99 rnorm = (FLOAT *) mem_alloc (sizeof (FCB), "rnorm");
100 window = (FLOAT *) mem_alloc (sizeof (FBLK), "window");
101 tmn = (double *) mem_alloc (sizeof (DCB), "tmn");
102 s = (FCB *) mem_alloc (sizeof (FCBCB), "s");
103 lthr = (FHBLK *) mem_alloc (sizeof (F2HBLK), "lthr");
104 r = (F2HBLK *) mem_alloc (sizeof (F22HBLK), "r");
105 phi_sav = (F2HBLK *) mem_alloc (sizeof (F22HBLK), "phi_sav");
107 i = sfreq + 0.5;
108 switch (i)
110 case 32000:
111 sfreq_idx = 0;
112 break;
113 case 44100:
114 sfreq_idx = 1;
115 break;
116 case 48000:
117 sfreq_idx = 2;
118 break;
119 default:
120 fprintf (stderr, "error, invalid sampling frequency: %d Hz\n", i);
121 exit (-1);
123 fprintf (stderr, "absthr[][] sampling frequency index: %d\n", sfreq_idx);
124 absthr = absthr_table[sfreq_idx];
126 flush = 384 * 3.0 / 2.0;
127 syncsize = 1056;
128 sync_flush = syncsize - flush;
130 /* calculate HANN window coefficients */
131 /* for(i=0;i<BLKSIZE;i++)window[i]=0.5*(1-cos(2.0*PI*i/(BLKSIZE-1.0))); */
132 for (i = 0; i < BLKSIZE; i++)
133 window[i] = 0.5 * (1 - cos (2.0 * PI * (i - 0.5) / BLKSIZE));
134 /* reset states used in unpredictability measure */
135 for (i = 0; i < HBLKSIZE; i++)
137 r[0][0][i] = r[1][0][i] = r[0][1][i] = r[1][1][i] = 0;
138 phi_sav[0][0][i] = phi_sav[1][0][i] = 0;
139 phi_sav[0][1][i] = phi_sav[1][1][i] = 0;
140 lthr[0][i] = 60802371420160.0;
141 lthr[1][i] = 60802371420160.0;
143 /*****************************************************************************
144 * Initialization: Compute the following constants for use later *
145 * partition[HBLKSIZE] = the partition number associated with each *
146 * frequency line *
147 * cbval[CRITBANDS] = the center (average) bark value of each *
148 * partition *
149 * numlines[CRITBANDS] = the number of frequency lines in each partition *
150 * tmn[CRITBANDS] = tone masking noise *
151 *****************************************************************************/
152 /* compute fft frequency multiplicand */
153 freq_mult = sfreq / BLKSIZE;
155 /* calculate fft frequency, then bval of each line (use fthr[] as tmp storage)*/
156 for (i = 0; i < HBLKSIZE; i++)
158 temp1 = i * freq_mult;
159 j = 1;
160 while (temp1 > crit_band[j])
161 j++;
162 fthr[i] =
163 j - 1 + (temp1 - crit_band[j - 1]) / (crit_band[j] -
164 crit_band[j - 1]);
166 partition[0] = 0;
167 /* temp2 is the counter of the number of frequency lines in each partition */
168 temp2 = 1;
169 cbval[0] = fthr[0];
170 bval_lo = fthr[0];
171 for (i = 1; i < HBLKSIZE; i++)
173 if ((fthr[i] - bval_lo) > 0.33)
175 partition[i] = partition[i - 1] + 1;
176 cbval[partition[i - 1]] = cbval[partition[i - 1]] / temp2;
177 cbval[partition[i]] = fthr[i];
178 bval_lo = fthr[i];
179 numlines[partition[i - 1]] = temp2;
180 temp2 = 1;
182 else
184 partition[i] = partition[i - 1];
185 cbval[partition[i]] += fthr[i];
186 temp2++;
189 numlines[partition[i - 1]] = temp2;
190 cbval[partition[i - 1]] = cbval[partition[i - 1]] / temp2;
192 /************************************************************************
193 * Now compute the spreading function, s[j][i], the value of the spread-*
194 * ing function, centered at band j, for band i, store for later use *
195 ************************************************************************/
196 for (j = 0; j < CRITBANDS; j++)
198 for (i = 0; i < CRITBANDS; i++)
200 temp1 = (cbval[i] - cbval[j]) * 1.05;
201 if (temp1 >= 0.5 && temp1 <= 2.5)
203 temp2 = temp1 - 0.5;
204 temp2 = 8.0 * (temp2 * temp2 - 2.0 * temp2);
206 else
207 temp2 = 0;
208 temp1 += 0.474;
209 temp3 =
210 15.811389 + 7.5 * temp1 -
211 17.5 * sqrt ((double) (1.0 + temp1 * temp1));
212 if (temp3 <= -100)
213 s[i][j] = 0;
214 else
216 temp3 = (temp2 + temp3) * LN_TO_LOG10;
217 s[i][j] = exp (temp3);
222 /* Calculate Tone Masking Noise values */
223 for (j = 0; j < CRITBANDS; j++)
225 temp1 = 15.5 + cbval[j];
226 tmn[j] = (temp1 > 24.5) ? temp1 : 24.5;
227 /* Calculate normalization factors for the net spreading functions */
228 rnorm[j] = 0;
229 for (i = 0; i < CRITBANDS; i++)
231 rnorm[j] += s[j][i];
238 void
239 psycho_two (short int *buffer, short int savebuf[1056], int chn,
240 FLOAT snr32[32], double sfreq)
242 unsigned int i, j, k;
243 FLOAT r_prime, phi_prime;
244 FLOAT minthres, sum_energy;
245 double tb, temp1, temp2, temp3;
247 #ifdef GOOB
248 if (init == 0)
250 init++;
251 init_psycho_two (sfreq);
253 #endif
255 for (i = 0; i < 2; i++)
257 /*****************************************************************************
258 * Net offset is 480 samples (1056-576) for layer 2; this is because one must*
259 * stagger input data by 256 samples to synchronize psychoacoustic model with*
260 * filter bank outputs, then stagger so that center of 1024 FFT window lines *
261 * up with center of 576 "new" audio samples. *
263 * For layer 1, the input data still needs to be staggered by 256 samples, *
264 * then it must be staggered again so that the 384 "new" samples are centered*
265 * in the 1024 FFT window. The net offset is then 576 and you need 448 "new"*
266 * samples for each iteration to keep the 384 samples of interest centered *
267 *****************************************************************************/
268 for (j = 0; j < syncsize; j++)
270 if (j < (sync_flush))
271 savebuf[j] = savebuf[j + flush];
272 else
273 savebuf[j] = *buffer++;
274 if (j < BLKSIZE)
276 wsamp_r[j] = window[j] * ((FLOAT) savebuf[j]);
277 wsamp_i[j] = 0;
281 fft (wsamp_r, wsamp_i, energy, phi, 1024);
283 /*****************************************************************************
284 * calculate the unpredictability measure, given energy[f] and phi[f] *
285 *****************************************************************************/
286 /*only update data "age" pointers after you are done with both channels */
287 /*for layer 1 computations, for the layer 2 double computations, the pointers*/
288 /*are reset automatically on the second pass */
290 if (new == 0)
292 new = 1;
293 oldest = 1;
295 else
297 new = 0;
298 oldest = 0;
300 if (old == 0)
301 old = 1;
302 else
303 old = 0;
305 for (j = 0; j < HBLKSIZE; j++)
307 r_prime = 2.0 * r[chn][old][j] - r[chn][oldest][j];
308 phi_prime = 2.0 * phi_sav[chn][old][j] - phi_sav[chn][oldest][j];
309 r[chn][new][j] = sqrt ((double) energy[j]);
310 phi_sav[chn][new][j] = phi[j];
311 temp1 =
312 r[chn][new][j] * cos ((double) phi[j]) -
313 r_prime * cos ((double) phi_prime);
314 temp2 =
315 r[chn][new][j] * sin ((double) phi[j]) -
316 r_prime * sin ((double) phi_prime);
317 temp3 = r[chn][new][j] + fabs ((double) r_prime);
318 if (temp3 != 0)
319 c[j] = sqrt (temp1 * temp1 + temp2 * temp2) / temp3;
320 else
321 c[j] = 0;
323 /*****************************************************************************
324 * Calculate the grouped, energy-weighted, unpredictability measure, *
325 * grouped_c[], and the grouped energy. grouped_e[] *
326 *****************************************************************************/
327 for (j = 1; j < CRITBANDS; j++)
329 grouped_e[j] = 0;
330 grouped_c[j] = 0;
332 grouped_e[0] = energy[0];
333 grouped_c[0] = energy[0] * c[0];
334 for (j = 1; j < HBLKSIZE; j++)
336 grouped_e[partition[j]] += energy[j];
337 grouped_c[partition[j]] += energy[j] * c[j];
340 /*****************************************************************************
341 * convolve the grouped energy-weighted unpredictability measure *
342 * and the grouped energy with the spreading function, s[j][k] *
343 *****************************************************************************/
344 for (j = 0; j < CRITBANDS; j++)
346 ecb[j] = 0;
347 cb[j] = 0;
348 for (k = 0; k < CRITBANDS; k++)
350 if (s[j][k] != 0.0)
352 ecb[j] += s[j][k] * grouped_e[k];
353 cb[j] += s[j][k] * grouped_c[k];
356 if (ecb[j] != 0)
357 cb[j] = cb[j] / ecb[j];
358 else
359 cb[j] = 0;
362 /*****************************************************************************
363 * Calculate the required SNR for each of the frequency partitions *
364 * this whole section can be accomplished by a table lookup *
365 *****************************************************************************/
366 for (j = 0; j < CRITBANDS; j++)
368 if (cb[j] < .05)
369 cb[j] = 0.05;
370 else if (cb[j] > .5)
371 cb[j] = 0.5;
372 tb = -0.434294482 * log ((double) cb[j]) - 0.301029996;
373 cb[j] = tb;
374 bc[j] = tmn[j] * tb + nmt * (1.0 - tb);
375 k = cbval[j] + 0.5;
376 bc[j] = (bc[j] > bmax[k]) ? bc[j] : bmax[k];
377 bc[j] = exp ((double) -bc[j] * LN_TO_LOG10);
380 /*****************************************************************************
381 * Calculate the permissible noise energy level in each of the frequency *
382 * partitions. Include absolute threshold and pre-echo controls *
383 * this whole section can be accomplished by a table lookup *
384 *****************************************************************************/
385 for (j = 0; j < CRITBANDS; j++)
386 if (rnorm[j] && numlines[j])
387 nb[j] = ecb[j] * bc[j] / (rnorm[j] * numlines[j]);
388 else
389 nb[j] = 0;
390 for (j = 0; j < HBLKSIZE; j++)
392 /*temp1 is the preliminary threshold */
393 temp1 = nb[partition[j]];
394 temp1 = (temp1 > absthr[j]) ? temp1 : absthr[j];
395 /*do not use pre-echo control for layer 2 because it may do bad things to the*/
396 /* MUSICAM bit allocation algorithm */
397 /* if(lay==1){
398 fthr[j] = (temp1 < lthr[chn][j]) ? temp1 : lthr[chn][j];
399 temp2 = temp1 * 0.00316;
400 fthr[j] = (temp2 > fthr[j]) ? temp2 : fthr[j];
402 else fthr[j] = temp1; */
403 fthr[j] = temp1;
404 lthr[chn][j] = LXMIN * temp1;
407 /*****************************************************************************
408 * Translate the 512 threshold values to the 32 filter bands of the coder *
409 *****************************************************************************/
410 for (j = 0; j < 193; j += 16)
412 minthres = 60802371420160.0;
413 sum_energy = 0.0;
414 for (k = 0; k < 17; k++)
416 if (minthres > fthr[j + k])
417 minthres = fthr[j + k];
418 sum_energy += energy[j + k];
420 snrtmp[i][j / 16] = sum_energy / (minthres * 17.0);
421 snrtmp[i][j / 16] = 4.342944819 * log ((double) snrtmp[i][j / 16]);
423 for (j = 208; j < (HBLKSIZE - 1); j += 16)
425 minthres = 0.0;
426 sum_energy = 0.0;
427 for (k = 0; k < 17; k++)
429 minthres += fthr[j + k];
430 sum_energy += energy[j + k];
432 snrtmp[i][j / 16] = sum_energy / minthres;
433 snrtmp[i][j / 16] = 4.342944819 * log ((double) snrtmp[i][j / 16]);
435 /*****************************************************************************
436 * End of Psychoacuostic calculation loop *
437 *****************************************************************************/
439 for (i = 0; i < 32; i++)
441 snr32[i] = (snrtmp[0][i] > snrtmp[1][i]) ? snrtmp[0][i] : snrtmp[1][i];