r123: Merged HEAD and TEST. New stuff shall be committed to HEAD from now on.
[cinelerra_cv/mob.git] / plugins / toolame / tonal.c
blob23b9a6e325b79f18dd40ce709e6ecad78f6a0e5c
1 #include <stdlib.h>
2 #include "common.h"
3 #include "encoder.h"
6 #define LONDON /* enable "LONDON" modification */
7 #define MAKE_SENSE /* enable "MAKE_SENSE" modification */
8 #define MI_OPTION /* enable "MI_OPTION" modification */
9 /**********************************************************************
11 * This module implements the psychoacoustic model I for the
12 * MPEG encoder layer II. It uses simplified tonal and noise masking
13 * threshold analysis to generate SMR for the encoder bit allocation
14 * routine.
16 **********************************************************************/
18 int crit_band;
19 int *cbound;
20 int sub_size;
22 double
23 add_db (double a, double b)
25 a = pow (10.0, a / 10.0);
26 b = pow (10.0, b / 10.0);
27 return 10 * log10 (a + b);
30 void
31 read_cbound (int freq)
34 #include "critband.h"
35 //static const int FirstCriticalBand[7][27] = {...
37 int i, k;
39 if ((freq < 0) || (freq > 6) || (freq == 3))
41 fprintf (stderr, "Internal error (read_cbound())\n");
42 return;
46 crit_band = CriticalBands[freq][0];
47 cbound = (int *) mem_alloc (sizeof (int) * crit_band, "cbound");
48 for (i = 0; i < crit_band; i++)
50 k = CriticalBands[freq][i + 1];
51 if (k != 0)
53 cbound[i] = k;
55 else
57 fprintf (stderr, "Internal error (read_cbound())\n");
58 return;
65 void
66 read_freq_band (g_ptr * ltg, int freq)
70 #include "freqtable.h"
71 int i, k;
73 if ((freq < 0) || (freq > 6) || (freq == 3))
75 fprintf (stderr, "Internal error (read_freq_band())\n");
76 return;
80 sub_size = SecondFreqEntries[freq] + 1;
81 *ltg = (g_ptr) mem_alloc (sizeof (g_thres) * sub_size, "ltg");
82 (*ltg)[0].line = 0; /* initialize global masking threshold */
83 (*ltg)[0].bark = 0.0;
84 (*ltg)[0].hear = 0.0;
85 for (i = 1; i < sub_size; i++)
87 k = SecondFreqSubband[freq][i - 1].line;
88 if (k != 0)
90 (*ltg)[i].line = k;
91 (*ltg)[i].bark = SecondFreqSubband[freq][i - 1].bark;
92 (*ltg)[i].hear = SecondFreqSubband[freq][i - 1].hear;
94 else
96 printf ("Internal error (read_freq_band())\n");
97 return;
104 void
105 make_map (mask power[HAN_SIZE], g_thres * ltg)
106 /* this function calculates the global masking threshold */
108 int i, j;
110 for (i = 1; i < sub_size; i++)
111 for (j = ltg[i - 1].line; j <= ltg[i].line; j++)
112 power[j].map = i;
115 /****************************************************************
117 * Fast Fourier transform of the input samples.
119 ****************************************************************/
120 /* FIXME: meld this with the FFT in subs.c */
121 void
122 f_f_t (double sample[FFT_SIZE], mask power[HAN_SIZE])
124 int i, j, k, L, l = 0;
125 int ip, le, le1;
126 double t_r, t_i, u_r, u_i;
127 static int M, MM1, init = 0, N;
128 double *x_r, *x_i, *energy;
129 static int *rev;
130 static double *w_r, *w_i;
132 x_r = (double *) mem_alloc (sizeof (DFFT), "x_r");
133 x_i = (double *) mem_alloc (sizeof (DFFT), "x_i");
134 energy = (double *) mem_alloc (sizeof (DFFT), "energy");
135 for (i = 0; i < FFT_SIZE; i++)
136 x_r[i] = x_i[i] = energy[i] = 0;
137 if (!init)
139 rev = (int *) mem_alloc (sizeof (IFFT), "rev");
140 w_r = (double *) mem_alloc (sizeof (D10), "w_r");
141 w_i = (double *) mem_alloc (sizeof (D10), "w_i");
142 M = 10;
143 MM1 = 9;
144 N = FFT_SIZE;
145 for (L = 0; L < M; L++)
147 le = 1 << (M - L);
148 le1 = le >> 1;
149 w_r[L] = cos (PI / le1);
150 w_i[L] = -sin (PI / le1);
152 for (i = 0; i < FFT_SIZE; rev[i] = l, i++)
153 for (j = 0, l = 0; j < 10; j++)
155 k = (i >> j) & 1;
156 l |= (k << (9 - j));
158 init = 1;
160 memcpy ((char *) x_r, (char *) sample, sizeof (double) * FFT_SIZE);
161 for (L = 0; L < MM1; L++)
163 le = 1 << (M - L);
164 le1 = le >> 1;
165 u_r = 1;
166 u_i = 0;
167 for (j = 0; j < le1; j++)
169 for (i = j; i < N; i += le)
171 ip = i + le1;
172 t_r = x_r[i] + x_r[ip];
173 t_i = x_i[i] + x_i[ip];
174 x_r[ip] = x_r[i] - x_r[ip];
175 x_i[ip] = x_i[i] - x_i[ip];
176 x_r[i] = t_r;
177 x_i[i] = t_i;
178 t_r = x_r[ip];
179 x_r[ip] = x_r[ip] * u_r - x_i[ip] * u_i;
180 x_i[ip] = x_i[ip] * u_r + t_r * u_i;
182 t_r = u_r;
183 u_r = u_r * w_r[L] - u_i * w_i[L];
184 u_i = u_i * w_r[L] + t_r * w_i[L];
187 for (i = 0; i < N; i += 2)
189 ip = i + 1;
190 t_r = x_r[i] + x_r[ip];
191 t_i = x_i[i] + x_i[ip];
192 x_r[ip] = x_r[i] - x_r[ip];
193 x_i[ip] = x_i[i] - x_i[ip];
194 x_r[i] = t_r;
195 x_i[i] = t_i;
196 energy[i] = x_r[i] * x_r[i] + x_i[i] * x_i[i];
198 for (i = 0; i < FFT_SIZE; i++)
199 if (i < rev[i])
201 t_r = energy[i];
202 energy[i] = energy[rev[i]];
203 energy[rev[i]] = t_r;
205 for (i = 0; i < HAN_SIZE; i++)
206 { /* calculate power density spectrum */
207 if (energy[i] < 1E-20)
208 energy[i] = 1E-20;
209 power[i].x = 10 * log10 (energy[i]) + POWERNORM;
210 power[i].next = STOP;
211 power[i].type = FALSE;
213 mem_free ((void **) &x_r);
214 mem_free ((void **) &x_i);
215 mem_free ((void **) &energy);
218 /****************************************************************
220 * Window the incoming audio signal.
222 ****************************************************************/
224 void
225 hann_win (double sample[FFT_SIZE])
227 /* this function calculates a Hann window for PCM (input) samples for a 1024-pt. FFT */
228 register int i;
229 register double sqrt_8_over_3;
230 static int init = 0;
231 static double *window;
233 if (!init)
234 { /* calculate window function for the Fourier transform */
235 window = (double *) mem_alloc (sizeof (DFFT), "window");
236 sqrt_8_over_3 = pow (8.0 / 3.0, 0.5);
237 for (i = 0; i < FFT_SIZE; i++)
239 /* Hann window formula */
240 window[i] =
241 sqrt_8_over_3 * 0.5 * (1 - cos (2.0 * PI * i / (FFT_SIZE))) / FFT_SIZE;
243 init = 1;
245 for (i = 0; i < FFT_SIZE; i++)
246 sample[i] *= window[i];
249 /*******************************************************************
251 * This function finds the maximum spectral component in each
252 * subband and return them to the encoder for time-domain threshold
253 * determination.
255 *******************************************************************/
256 #ifndef LONDON
257 void
258 pick_max (mask power[HAN_SIZE], double spike[SBLIMIT])
260 double max;
261 int i, j;
263 for (i = 0; i < HAN_SIZE; spike[i >> 4] = max, i += 16) /* calculate the */
264 for (j = 0, max = DBMIN; j < 16; j++) /* maximum spectral */
265 max = (max > power[i + j].x) ? max : power[i + j].x; /* component in each */
266 } /* subband from bound */
268 /* 4-16 */
269 #else
270 void
271 pick_max (mask power[HAN_SIZE], double spike[SBLIMIT])
273 double sum;
274 int i, j;
276 for (i = 0; i < HAN_SIZE; spike[i >> 4] = 10.0 * log10 (sum), i += 16)
277 /* calculate the */
278 for (j = 0, sum = pow (10.0, 0.1 * DBMIN); j < 16; j++) /* sum of spectral */
279 sum += pow (10.0, 0.1 * power[i + j].x); /* component in each */
280 } /* subband from bound */
282 /* 4-16 */
283 #endif
285 /****************************************************************
287 * This function labels the tonal component in the power
288 * spectrum.
290 ****************************************************************/
292 void
293 tonal_label (mask power[HAN_SIZE], int *tone)
294 /* this function extracts (tonal) sinusoidals from the spectrum */
296 int i, j, last = LAST, first, run, last_but_one = LAST; /* dpwe */
297 double max;
299 *tone = LAST;
300 for (i = 2; i < HAN_SIZE - 12; i++)
302 if (power[i].x > power[i - 1].x && power[i].x >= power[i + 1].x)
304 power[i].type = TONE;
305 power[i].next = LAST;
306 if (last != LAST)
307 power[last].next = i;
308 else
309 first = *tone = i;
310 last = i;
313 last = LAST;
314 first = *tone;
315 *tone = LAST;
316 while (first != LAST)
317 { /* the conditions for the tonal */
318 if (first < 3 || first > 500)
319 run = 0; /* otherwise k+/-j will be out of bounds */
320 else if (first < 63)
321 run = 2; /* components in layer II, which */
322 else if (first < 127)
323 run = 3; /* are the boundaries for calc. */
324 else if (first < 255)
325 run = 6; /* the tonal components */
326 else
327 run = 12;
328 max = power[first].x - 7; /* after calculation of tonal */
329 for (j = 2; j <= run; j++) /* components, set to local max */
330 if (max < power[first - j].x || max < power[first + j].x)
332 power[first].type = FALSE;
333 break;
335 if (power[first].type == TONE)
336 { /* extract tonal components */
337 int help = first;
338 if (*tone == LAST)
339 *tone = first;
340 while ((power[help].next != LAST)
341 && (power[help].next - first) <= run)
342 help = power[help].next;
343 help = power[help].next;
344 power[first].next = help;
345 if ((first - last) <= run)
347 if (last_but_one != LAST)
348 power[last_but_one].next = first;
350 if (first > 1 && first < 500)
351 { /* calculate the sum of the */
352 double tmp; /* powers of the components */
353 tmp = add_db (power[first - 1].x, power[first + 1].x);
354 power[first].x = add_db (power[first].x, tmp);
356 for (j = 1; j <= run; j++)
358 power[first - j].x = power[first + j].x = DBMIN;
359 power[first - j].next = power[first + j].next = STOP;
360 power[first - j].type = power[first + j].type = FALSE;
362 last_but_one = last;
363 last = first;
364 first = power[first].next;
366 else
368 int ll;
369 if (last == LAST); /* *tone = power[first].next; dpwe */
370 else
371 power[last].next = power[first].next;
372 ll = first;
373 first = power[first].next;
374 power[ll].next = STOP;
379 /****************************************************************
381 * This function groups all the remaining non-tonal
382 * spectral lines into critical band where they are replaced by
383 * one single line.
385 ****************************************************************/
387 void
388 noise_label (mask * power, int *noise, g_thres * ltg)
390 int i, j, centre, last = LAST;
391 double index, weight, sum;
392 /* calculate the remaining spectral */
393 for (i = 0; i < crit_band - 1; i++)
394 { /* lines for non-tonal components */
395 for (j = cbound[i], weight = 0.0, sum = DBMIN; j < cbound[i + 1]; j++)
397 if (power[j].type != TONE)
399 if (power[j].x != DBMIN)
401 sum = add_db (power[j].x, sum);
402 /* the line below and others under the "MAKE_SENSE" condition are an alternate
403 interpretation of "geometric mean". This approach may make more sense but
404 it has not been tested with hardware. */
405 #ifdef MAKE_SENSE
406 /* weight += pow(10.0, power[j].x/10.0) * (ltg[power[j].map].bark-i);
407 bad code [SS] 21-1-93
409 weight += pow (10.0, power[j].x / 10.0) * (double) (j - cbound[i]) / (double) (cbound[i + 1] - cbound[i]); /* correction */
410 #endif
411 power[j].x = DBMIN;
413 } /* check to see if the spectral line is low dB, and if */
414 } /* so replace the center of the critical band, which is */
415 /* the center freq. of the noise component */
417 #ifdef MAKE_SENSE
418 if (sum <= DBMIN)
419 centre = (cbound[i + 1] + cbound[i]) / 2;
420 else
422 index = weight / pow (10.0, sum / 10.0);
423 centre =
424 cbound[i] + (int) (index * (double) (cbound[i + 1] - cbound[i]));
426 #else
427 index =
428 (double) (((double) cbound[i]) * ((double) (cbound[i + 1] - 1)));
429 centre = (int) (pow (index, 0.5) + 0.5);
430 #endif
432 /* locate next non-tonal component until finished; */
433 /* add to list of non-tonal components */
434 #ifdef MI_OPTION
435 /* Masahiro Iwadare's fix for infinite looping problem? */
436 if (power[centre].type == TONE)
438 if (power[centre + 1].type == TONE)
439 centre++;
440 else
441 centre--;
443 #else
444 /* Mike Li's fix for infinite looping problem */
445 if (power[centre].type == FALSE)
446 centre++;
448 if (power[centre].type == NOISE)
450 if (power[centre].x >= ltg[power[i].map].hear)
452 if (sum >= ltg[power[i].map].hear)
453 sum = add_db (power[j].x, sum);
454 else
455 sum = power[centre].x;
458 #endif
459 if (last == LAST)
460 *noise = centre;
461 else
463 power[centre].next = LAST;
464 power[last].next = centre;
466 power[centre].x = sum;
467 power[centre].type = NOISE;
468 last = centre;
472 /****************************************************************
474 * This function reduces the number of noise and tonal
475 * component for further threshold analysis.
477 ****************************************************************/
479 void
480 subsampling (mask power[HAN_SIZE], g_thres * ltg, int *tone, int *noise)
482 int i, old;
484 i = *tone;
485 old = STOP; /* calculate tonal components for */
486 while (i != LAST)
487 { /* reduction of spectral lines */
488 if (power[i].x < ltg[power[i].map].hear)
490 power[i].type = FALSE;
491 power[i].x = DBMIN;
492 if (old == STOP)
493 *tone = power[i].next;
494 else
495 power[old].next = power[i].next;
497 else
498 old = i;
499 i = power[i].next;
501 i = *noise;
502 old = STOP; /* calculate non-tonal components for */
503 while (i != LAST)
504 { /* reduction of spectral lines */
505 if (power[i].x < ltg[power[i].map].hear)
507 power[i].type = FALSE;
508 power[i].x = DBMIN;
509 if (old == STOP)
510 *noise = power[i].next;
511 else
512 power[old].next = power[i].next;
514 else
515 old = i;
516 i = power[i].next;
518 i = *tone;
519 old = STOP;
520 while (i != LAST)
521 { /* if more than one */
522 if (power[i].next == LAST)
523 break; /* tonal component */
524 if (ltg[power[power[i].next].map].bark - /* is less than .5 */
525 ltg[power[i].map].bark < 0.5)
526 { /* bark, take the */
527 if (power[power[i].next].x > power[i].x)
528 { /* maximum */
529 if (old == STOP)
530 *tone = power[i].next;
531 else
532 power[old].next = power[i].next;
533 power[i].type = FALSE;
534 power[i].x = DBMIN;
535 i = power[i].next;
537 else
539 power[power[i].next].type = FALSE;
540 power[power[i].next].x = DBMIN;
541 power[i].next = power[power[i].next].next;
542 old = i;
545 else
547 old = i;
548 i = power[i].next;
553 /****************************************************************
555 * This function calculates the individual threshold and
556 * sum with the quiet threshold to find the global threshold.
558 ****************************************************************/
560 void
561 threshold (mask power[HAN_SIZE], g_thres * ltg, int *tone, int *noise,
562 int bit_rate)
564 int k, t;
565 double dz, tmps, vf = 0.0;
567 for (k = 1; k < sub_size; k++)
569 ltg[k].x = DBMIN;
570 t = *tone; /* calculate individual masking threshold for */
571 while (t != LAST)
572 { /* components in order to find the global */
573 if (ltg[k].bark - ltg[power[t].map].bark >= -3.0 && /*threshold (LTG) */
574 ltg[k].bark - ltg[power[t].map].bark < 8.0)
576 dz = ltg[k].bark - ltg[power[t].map].bark; /* distance of bark value */
577 tmps =
578 -1.525 - 0.275 * ltg[power[t].map].bark - 4.5 + power[t].x;
579 /* masking function for lower & upper slopes */
580 if (-3 <= dz && dz < -1)
581 vf = 17 * (dz + 1) - (0.4 * power[t].x + 6);
582 else if (-1 <= dz && dz < 0)
583 vf = (0.4 * power[t].x + 6) * dz;
584 else if (0 <= dz && dz < 1)
585 vf = (-17 * dz);
586 else if (1 <= dz && dz < 8)
587 vf = -(dz - 1) * (17 - 0.15 * power[t].x) - 17;
588 tmps += vf;
589 ltg[k].x = add_db (ltg[k].x, tmps);
591 t = power[t].next;
594 t = *noise; /* calculate individual masking threshold */
595 while (t != LAST)
596 { /* for non-tonal components to find LTG */
597 if (ltg[k].bark - ltg[power[t].map].bark >= -3.0 &&
598 ltg[k].bark - ltg[power[t].map].bark < 8.0)
600 dz = ltg[k].bark - ltg[power[t].map].bark; /* distance of bark value */
601 tmps =
602 -1.525 - 0.175 * ltg[power[t].map].bark - 0.5 + power[t].x;
603 /* masking function for lower & upper slopes */
604 if (-3 <= dz && dz < -1)
605 vf = 17 * (dz + 1) - (0.4 * power[t].x + 6);
606 else if (-1 <= dz && dz < 0)
607 vf = (0.4 * power[t].x + 6) * dz;
608 else if (0 <= dz && dz < 1)
609 vf = (-17 * dz);
610 else if (1 <= dz && dz < 8)
611 vf = -(dz - 1) * (17 - 0.15 * power[t].x) - 17;
612 tmps += vf;
613 ltg[k].x = add_db (ltg[k].x, tmps);
615 t = power[t].next;
617 if (bit_rate < 96)
618 ltg[k].x = add_db (ltg[k].hear, ltg[k].x);
619 else
620 ltg[k].x = add_db (ltg[k].hear - 12.0, ltg[k].x);
624 /****************************************************************
626 * This function finds the minimum masking threshold and
627 * return the value to the encoder.
629 ****************************************************************/
631 void
632 minimum_mask (g_thres * ltg, double ltmin[SBLIMIT], int sblimit)
634 double min;
635 int i, j;
637 j = 1;
638 for (i = 0; i < sblimit; i++)
639 if (j >= sub_size - 1) /* check subband limit, and */
640 ltmin[i] = ltg[sub_size - 1].hear; /* calculate the minimum masking */
641 else
642 { /* level of LTMIN for each subband */
643 min = ltg[j].x;
644 while (ltg[j].line >> 4 == i && j < sub_size)
646 if (min > ltg[j].x)
647 min = ltg[j].x;
648 j++;
650 ltmin[i] = min;
654 /*****************************************************************
656 * This procedure is called in musicin to pick out the
657 * smaller of the scalefactor or threshold.
659 *****************************************************************/
661 void
662 smr (double ltmin[SBLIMIT], double spike[SBLIMIT],
663 double scale[SBLIMIT], int sblimit)
665 int i;
666 double max;
668 for (i = 0; i < sblimit; i++)
669 { /* determine the signal */
670 max = 20 * log10 (scale[i] * 32768) - 10; /* level for each subband */
671 if (spike[i] > max)
672 max = spike[i]; /* for the maximum scale */
673 max -= ltmin[i]; /* factors */
674 ltmin[i] = max;
678 /****************************************************************
680 * This procedure calls all the necessary functions to
681 * complete the psychoacoustic analysis.
683 ****************************************************************/
685 void
686 psycho_one (short buffer[2][1152], double scale[2][SBLIMIT],
687 double ltmin[2][SBLIMIT], frame_params * fr_ps)
689 layer *info = fr_ps->header;
690 int stereo = fr_ps->stereo;
691 int sblimit = fr_ps->sblimit;
692 int k, i, tone = 0, noise = 0;
693 static char init = 0;
694 static int off[2] = { 256, 256 };
695 double *sample;
696 DSBL *spike;
697 static D1408 *fft_buf;
698 static mask_ptr power;
699 static g_ptr ltg;
701 sample = (double *) mem_alloc (sizeof (DFFT), "sample");
702 spike = (DSBL *) mem_alloc (sizeof (D2SBL), "spike");
703 /* call functions for critical boundaries, freq. */
704 if (!init)
705 { /* bands, bark values, and mapping */
706 fft_buf = (D1408 *) mem_alloc ((long) sizeof (D1408) * 2, "fft_buf");
707 power = (mask_ptr) mem_alloc (sizeof (mask) * HAN_SIZE, "power");
708 if (info->version == MPEG_AUDIO_ID)
710 read_cbound (info->sampling_frequency);
711 read_freq_band (&ltg, info->sampling_frequency);
713 else
715 read_cbound (info->sampling_frequency + 4);
716 read_freq_band (&ltg, info->sampling_frequency + 4);
718 make_map (power, ltg);
719 for (i = 0; i < 1408; i++)
720 fft_buf[0][i] = fft_buf[1][i] = 0;
721 init = 1;
723 for (k = 0; k < stereo; k++)
724 { /* check pcm input for 3 blocks of 384 samples */
725 for (i = 0; i < 1152; i++)
726 fft_buf[k][(i + off[k]) % 1408] = (double) buffer[k][i] / SCALE;
727 for (i = 0; i < FFT_SIZE; i++)
728 sample[i] = fft_buf[k][(i + 1216 + off[k]) % 1408];
729 off[k] += 1152;
730 off[k] %= 1408;
731 /* call functions for windowing PCM samples, */
732 hann_win (sample); /* location of spectral components in each */
733 for (i = 0; i < HAN_SIZE; i++)
734 power[i].x = DBMIN; /*subband with labeling */
735 f_f_t (sample, power); /*locate remaining non- */
736 pick_max (power, &spike[k][0]); /*tonal sinusoidals, */
737 tonal_label (power, &tone); /*reduce noise & tonal */
738 noise_label (power, &noise, ltg); /*components, find */
739 subsampling (power, ltg, &tone, &noise); /*global & minimal */
740 threshold (power, ltg, &tone, &noise, /*threshold, and sgnl- */
741 bitrate[info->version][info->bitrate_index] / stereo); /*to-mask ratio */
742 minimum_mask (ltg, &ltmin[k][0], sblimit);
743 smr (&ltmin[k][0], &spike[k][0], &scale[k][0], sblimit);
745 mem_free ((void **) &sample);
746 mem_free ((void **) &spike);