r1009: Move the dependencies to newer package names
[cinelerra_cv/mob.git] / toolame-02l / psycho_1.c
blobd3d673f6c3f3055e389855abb6c101583f053ea9
1 #include <stdio.h>
2 #include <math.h>
3 #include "common.h"
4 #include "encoder.h"
5 #include "mem.h"
6 #include "fft.h"
7 #include "psycho_1.h"
8 #include "psycho_1_priv.h"
10 #define DBTAB 1000
11 double dbtable[DBTAB];
13 /**********************************************************************
15 This module implements the psychoacoustic model I for the
16 MPEG encoder layer II. It uses simplified tonal and noise masking
17 threshold analysis to generate SMR for the encoder bit allocation
18 routine.
20 **********************************************************************/
22 void psycho_1 (short buffer[2][1152], double scale[2][SBLIMIT],
23 double ltmin[2][SBLIMIT], frame_info * frame)
25 frame_header *header = frame->header;
26 int nch = frame->nch;
27 int sblimit = frame->sblimit;
28 int k, i, tone = 0, noise = 0;
29 static char init = 0;
30 static int off[2] = { 256, 256 };
31 double sample[FFT_SIZE];
32 double spike[2][SBLIMIT];
33 static D1408 *fft_buf;
34 static mask_ptr power;
35 static g_ptr ltg;
36 FLOAT energy[FFT_SIZE];
38 /* call functions for critical boundaries, freq. */
39 if (!init) { /* bands, bark values, and mapping */
40 fft_buf = (D1408 *) mem_alloc ((long) sizeof (D1408) * 2, "fft_buf");
41 power = (mask_ptr) mem_alloc (sizeof (mask) * HAN_SIZE, "power");
42 if (header->version == MPEG_AUDIO_ID) {
43 psycho_1_read_cbound (header->lay, header->sampling_frequency);
44 psycho_1_read_freq_band (&ltg, header->lay, header->sampling_frequency);
45 } else {
46 psycho_1_read_cbound (header->lay, header->sampling_frequency + 4);
47 psycho_1_read_freq_band (&ltg, header->lay, header->sampling_frequency + 4);
49 psycho_1_make_map (power, ltg);
50 for (i = 0; i < 1408; i++)
51 fft_buf[0][i] = fft_buf[1][i] = 0;
53 psycho_1_init_add_db (); /* create the add_db table */
55 init = 1;
57 for (k = 0; k < nch; k++) {
58 /* check pcm input for 3 blocks of 384 samples */
59 /* sami's speedup, added in 02j
60 saves about 4% overall during an encode */
61 int ok = off[k] % 1408;
62 for (i = 0; i < 1152; i++) {
63 fft_buf[k][ok++] = (double) buffer[k][i] / SCALE;
64 if (ok >= 1408)
65 ok = 0;
67 ok = (off[k] + 1216) % 1408;
68 for (i = 0; i < FFT_SIZE; i++) {
69 sample[i] = fft_buf[k][ok++];
70 if (ok >= 1408)
71 ok = 0;
73 off[k] += 1152;
74 off[k] %= 1408;
76 psycho_1_hann_fft_pickmax (sample, power, &spike[k][0], energy);
77 psycho_1_tonal_label (power, &tone);
78 psycho_1_noise_label (power, &noise, ltg, energy);
79 //psycho_1_dump(power, &tone, &noise) ;
80 psycho_1_subsampling (power, ltg, &tone, &noise);
81 psycho_1_threshold (power, ltg, &tone, &noise,
82 bitrate[header->version][header->bitrate_index] / nch);
83 psycho_1_minimum_mask (ltg, &ltmin[k][0], sblimit);
84 psycho_1_smr (&ltmin[k][0], &spike[k][0], &scale[k][0], sblimit);
90 int crit_band;
91 int *cbound;
92 int sub_size;
94 void psycho_1_read_cbound (int lay, int freq)
95 /* this function reads in critical band boundaries */
98 #include "critband.h"
99 //static const int FirstCriticalBand[7][27] = {...
101 int i, k;
103 if ((lay < 1) || (lay > 2)) {
104 printf ("Internal error (read_cbound())\n");
105 return;
107 if ((freq < 0) || (freq > 6) || (freq == 3)) {
108 printf ("Internal error (read_cbound())\n");
109 return;
112 crit_band = SecondCriticalBand[freq][0];
113 cbound = (int *) mem_alloc (sizeof (int) * crit_band, "cbound");
114 for (i = 0; i < crit_band; i++) {
115 k = SecondCriticalBand[freq][i + 1];
116 if (k != 0) {
117 cbound[i] = k;
118 } else {
119 printf ("Internal error (read_cbound())\n");
120 return;
125 void psycho_1_read_freq_band (ltg, lay, freq) /* this function reads in */
126 int lay, freq; /* frequency bands and bark */
127 g_ptr *ltg; /* values */
130 #include "freqtable.h"
132 int i, k;
134 if ((freq < 0) || (freq > 6) || (freq == 3)) {
135 printf ("Internal error (read_freq_band())\n");
136 return;
139 /* read input for freq. subbands */
141 sub_size = SecondFreqEntries[freq] + 1;
142 *ltg = (g_ptr) mem_alloc (sizeof (g_thres) * sub_size, "ltg");
143 (*ltg)[0].line = 0; /* initialize global masking threshold */
144 (*ltg)[0].bark = 0.0;
145 (*ltg)[0].hear = 0.0;
146 for (i = 1; i < sub_size; i++) {
147 k = SecondFreqSubband[freq][i - 1].line;
148 if (k != 0) {
149 (*ltg)[i].line = k;
150 (*ltg)[i].bark = SecondFreqSubband[freq][i - 1].bark;
151 (*ltg)[i].hear = SecondFreqSubband[freq][i - 1].hear;
152 } else {
153 printf ("Internal error (read_freq_band())\n");
154 return;
160 void psycho_1_make_map (mask power[HAN_SIZE], g_thres * ltg)
161 /* this function calculates the global masking threshold */
163 int i, j;
165 for (i = 1; i < sub_size; i++)
166 for (j = ltg[i - 1].line; j <= ltg[i].line; j++)
167 power[j].map = i;
170 void psycho_1_init_add_db (void)
172 int i;
173 double x;
174 for (i = 0; i < DBTAB; i++) {
175 x = (double) i / 10.0;
176 dbtable[i] = 10 * log10 (1 + pow (10.0, x / 10.0)) - x;
180 INLINE double add_db (double a, double b)
182 /* MFC - if the difference between a and b is large (>99), then just return the
183 largest one. (about 10% of the time)
184 - For differences between 0 and 99, return the largest value, but add
185 in a pre-calculated difference value.
186 - the value 99 was chosen arbitarily.
187 - maximum (a-b) i've seen is 572 */
188 FLOAT fdiff;
189 int idiff;
190 fdiff = (10.0 * (a - b));
192 if (fdiff > 990.0) {
193 return a;
195 if (fdiff < -990.0) {
196 return (b);
199 idiff = (int) fdiff;
200 if (idiff >= 0) {
201 return (a + dbtable[idiff]);
204 return (b + dbtable[-idiff]);
207 /****************************************************************
208 * Window the samples then,
209 * Fast Fourier transform of the input samples.
211 * ( call the FHT-based fft() in fft.c )
214 ****************************************************************/
215 void psycho_1_hann_fft_pickmax (double sample[FFT_SIZE], mask power[HAN_SIZE],
216 double spike[SBLIMIT], FLOAT energy[FFT_SIZE])
218 FLOAT x_real[FFT_SIZE];
219 register int i, j;
220 register double sqrt_8_over_3;
221 static int init = 0;
222 static double *window;
223 double sum;
225 if (!init) { /* calculate window function for the Fourier transform */
226 window = (double *) mem_alloc (sizeof (DFFT), "window");
227 sqrt_8_over_3 = pow (8.0 / 3.0, 0.5);
228 for (i = 0; i < FFT_SIZE; i++) {
229 /* Hann window formula */
230 window[i] =
231 sqrt_8_over_3 * 0.5 * (1 -
232 cos (2.0 * PI * i / (FFT_SIZE))) / FFT_SIZE;
234 init = 1;
236 for (i = 0; i < FFT_SIZE; i++)
237 x_real[i] = (FLOAT) (sample[i] * window[i]);
239 psycho_1_fft (x_real, energy, FFT_SIZE);
241 for (i = 0; i < HAN_SIZE; i++) { /* calculate power density spectrum */
242 if (energy[i] < 1E-20)
243 power[i].x = -200.0 + POWERNORM;
244 else
245 power[i].x = 10 * log10 (energy[i]) + POWERNORM;
246 power[i].next = STOP;
247 power[i].type = FALSE;
250 /* Calculate the sum of spectral component in each subband from bound 4-16 */
252 #define CF 1073741824 /* pow(10, 0.1*POWERNORM) */
253 #define DBM 1E-20 /* pow(10.0, 0.1*DBMIN */
254 for (i = 0; i < HAN_SIZE; spike[i >> 4] = 10.0 * log10 (sum), i += 16) {
255 for (j = 0, sum = DBM; j < 16; j++)
256 sum += CF * energy[i + j];
260 /****************************************************************
262 * This function labels the tonal component in the power
263 * spectrum.
265 ****************************************************************/
267 void psycho_1_tonal_label (mask power[HAN_SIZE], int *tone)
268 /* this function extracts (tonal) sinusoidals from the spectrum */
270 int i, j, last = LAST, first, run, last_but_one = LAST; /* dpwe */
271 double max;
273 *tone = LAST;
274 for (i = 2; i < HAN_SIZE - 12; i++) {
275 if (power[i].x > power[i - 1].x && power[i].x >= power[i + 1].x) {
276 power[i].type = TONE;
277 power[i].next = LAST;
278 if (last != LAST)
279 power[last].next = i;
280 else
281 first = *tone = i;
282 last = i;
285 last = LAST;
286 first = *tone;
287 *tone = LAST;
288 while ((first != LAST) && (first != STOP)) { /* the conditions for the tonal */
289 if (first < 3 || first > 500)
290 run = 0; /* otherwise k+/-j will be out of bounds */
291 else if (first < 63)
292 run = 2; /* components in layer II, which */
293 else if (first < 127)
294 run = 3; /* are the boundaries for calc. */
295 else if (first < 255)
296 run = 6; /* the tonal components */
297 else
298 run = 12;
299 max = power[first].x - 7; /* after calculation of tonal */
300 for (j = 2; j <= run; j++) /* components, set to local max */
301 if (max < power[first - j].x || max < power[first + j].x) {
302 power[first].type = FALSE;
303 break;
305 if (power[first].type == TONE) { /* extract tonal components */
306 int help = first;
307 if (*tone == LAST)
308 *tone = first;
309 while ((power[help].next != LAST) && (power[help].next - first) <= run)
310 help = power[help].next;
311 help = power[help].next;
312 power[first].next = help;
313 if ((first - last) <= run) {
314 if (last_but_one != LAST)
315 power[last_but_one].next = first;
317 if (first > 1 && first < 500) { /* calculate the sum of the */
318 double tmp; /* powers of the components */
319 tmp = add_db (power[first - 1].x, power[first + 1].x);
320 power[first].x = add_db (power[first].x, tmp);
322 for (j = 1; j <= run; j++) {
323 power[first - j].x = power[first + j].x = DBMIN;
324 power[first - j].next = power[first + j].next = STOP;
325 power[first - j].type = power[first + j].type = FALSE;
327 last_but_one = last;
328 last = first;
329 first = power[first].next;
330 } else {
331 int ll;
332 if (last == LAST); /* *tone = power[first].next; dpwe */
333 else
334 power[last].next = power[first].next;
335 ll = first;
336 first = power[first].next;
337 power[ll].next = STOP;
342 /****************************************************************
344 * This function groups all the remaining non-tonal
345 * spectral lines into critical band where they are replaced by
346 * one single line.
348 ****************************************************************/
350 void psycho_1_noise_label (mask * power, int *noise, g_thres * ltg,
351 FLOAT energy[FFT_SIZE])
353 int i, j, centre, last = LAST;
354 double index, weight, sum;
355 /* calculate the remaining spectral */
356 for (i = 0; i < crit_band - 1; i++) { /* lines for non-tonal components */
357 for (j = cbound[i], weight = 0.0, sum = DBMIN; j < cbound[i + 1]; j++) {
358 if (power[j].type != TONE) {
359 if (power[j].x != DBMIN) {
360 sum = add_db (power[j].x, sum);
361 /* Weight is used in finding the geometric mean of the noise energy within a subband */
362 weight += CF * energy[j] * (double) (j - cbound[i]) / (double) (cbound[i + 1] - cbound[i]); /* correction */
363 power[j].x = DBMIN;
365 } /* check to see if the spectral line is low dB, and if */
366 } /* so replace the center of the critical band, which is */
367 /* the center freq. of the noise component */
369 if (sum <= DBMIN)
370 centre = (cbound[i + 1] + cbound[i]) / 2;
371 else {
372 /* fprintf(stderr, "%i [%f %f] -", count++,weight/pow(10.0,0.1*sum), weight*pow(10.0,-0.1*sum)); */
373 index = weight * pow (10.0, -0.1 * sum);
374 centre =
375 cbound[i] + (int) (index * (double) (cbound[i + 1] - cbound[i]));
379 /* locate next non-tonal component until finished; */
380 /* add to list of non-tonal components */
382 /* Masahiro Iwadare's fix for infinite looping problem? */
383 if (power[centre].type == TONE) {
384 if (power[centre + 1].type == TONE) {
385 centre++;
386 } else
387 centre--;
390 if (last == LAST)
391 *noise = centre;
392 else {
393 power[centre].next = LAST;
394 power[last].next = centre;
396 power[centre].x = sum;
397 power[centre].type = NOISE;
398 last = centre;
402 /****************************************************************
404 * This function reduces the number of noise and tonal
405 * component for further threshold analysis.
407 ****************************************************************/
409 void psycho_1_subsampling (mask power[HAN_SIZE], g_thres * ltg, int *tone, int *noise)
411 int i, old;
413 i = *tone;
414 old = STOP; /* calculate tonal components for */
416 while ((i != LAST) && (i != STOP))
417 { /* reduction of spectral lines */
418 if (power[i].x < ltg[power[i].map].hear) {
419 power[i].type = FALSE;
420 power[i].x = DBMIN;
421 if (old == STOP)
422 *tone = power[i].next;
423 else
424 power[old].next = power[i].next;
425 } else
426 old = i;
427 i = power[i].next;
429 i = *noise;
430 old = STOP; /* calculate non-tonal components for */
431 while ((i != LAST) && (i != STOP)) { /* reduction of spectral lines */
432 if (power[i].x < ltg[power[i].map].hear) {
433 power[i].type = FALSE;
434 power[i].x = DBMIN;
435 if (old == STOP)
436 *noise = power[i].next;
437 else
438 power[old].next = power[i].next;
439 } else
440 old = i;
441 i = power[i].next;
443 i = *tone;
444 old = STOP;
445 while ((i != LAST) && (i != STOP))
446 { /* if more than one */
447 if (power[i].next == LAST)
448 break; /* tonal component */
449 if (ltg[power[power[i].next].map].bark - /* is less than .5 */
450 ltg[power[i].map].bark < 0.5) { /* bark, take the */
451 if (power[power[i].next].x > power[i].x) { /* maximum */
452 if (old == STOP)
453 *tone = power[i].next;
454 else
455 power[old].next = power[i].next;
456 power[i].type = FALSE;
457 power[i].x = DBMIN;
458 i = power[i].next;
459 } else {
460 power[power[i].next].type = FALSE;
461 power[power[i].next].x = DBMIN;
462 power[i].next = power[power[i].next].next;
463 old = i;
465 } else {
466 old = i;
467 i = power[i].next;
472 /****************************************************************
474 * This function calculates the individual threshold and
475 * sum with the quiet threshold to find the global threshold.
477 ****************************************************************/
479 /* mainly just changed the way range checking was done MFC Nov 1999 */
480 void psycho_1_threshold (mask power[HAN_SIZE], g_thres * ltg, int *tone, int *noise,
481 int bit_rate)
483 int k, t;
484 double dz, tmps, vf;
486 for (k = 1; k < sub_size; k++) {
487 ltg[k].x = DBMIN;
488 t = *tone; /* calculate individual masking threshold for */
489 while ((t != LAST) && (t != STOP))
490 { /* components in order to find the global */
491 dz = ltg[k].bark - ltg[power[t].map].bark; /* distance of bark value */
492 if (dz >= -3.0 && dz < 8.0) {
493 tmps = -1.525 - 0.275 * ltg[power[t].map].bark - 4.5 + power[t].x;
494 /* masking function for lower & upper slopes */
495 if (dz < -1)
496 vf = 17 * (dz + 1) - (0.4 * power[t].x + 6);
497 else if (dz < 0)
498 vf = (0.4 * power[t].x + 6) * dz;
499 else if (dz < 1)
500 vf = (-17 * dz);
501 else
502 vf = -(dz - 1) * (17 - 0.15 * power[t].x) - 17;
503 ltg[k].x = add_db (ltg[k].x, tmps + vf);
505 t = power[t].next;
508 t = *noise; /* calculate individual masking threshold */
509 while ((t != LAST) && (t != STOP)) { /* for non-tonal components to find LTG */
510 dz = ltg[k].bark - ltg[power[t].map].bark; /* distance of bark value */
511 if (dz >= -3.0 && dz < 8.0) {
512 tmps = -1.525 - 0.175 * ltg[power[t].map].bark - 0.5 + power[t].x;
513 /* masking function for lower & upper slopes */
514 if (dz < -1)
515 vf = 17 * (dz + 1) - (0.4 * power[t].x + 6);
516 else if (dz < 0)
517 vf = (0.4 * power[t].x + 6) * dz;
518 else if (dz < 1)
519 vf = (-17 * dz);
520 else
521 vf = -(dz - 1) * (17 - 0.15 * power[t].x) - 17;
522 ltg[k].x = add_db (ltg[k].x, tmps + vf);
524 t = power[t].next;
526 if (bit_rate < 96)
527 ltg[k].x = add_db (ltg[k].hear, ltg[k].x);
528 else
529 ltg[k].x = add_db (ltg[k].hear - 12.0, ltg[k].x);
534 /****************************************************************
536 * This function finds the minimum masking threshold and
537 * return the value to the encoder.
539 ****************************************************************/
541 void psycho_1_minimum_mask (g_thres * ltg, double ltmin[SBLIMIT], int sblimit)
543 double min;
544 int i, j;
546 j = 1;
547 for (i = 0; i < sblimit; i++)
548 if (j >= sub_size - 1) /* check subband limit, and */
549 ltmin[i] = ltg[sub_size - 1].hear; /* calculate the minimum masking */
550 else { /* level of LTMIN for each subband */
551 min = ltg[j].x;
552 while (ltg[j].line >> 4 == i && j < sub_size) {
553 if (min > ltg[j].x)
554 min = ltg[j].x;
555 j++;
557 ltmin[i] = min;
561 /*****************************************************************
563 * This procedure is called in musicin to pick out the
564 * smaller of the scalefactor or threshold.
566 *****************************************************************/
568 void psycho_1_smr (double ltmin[SBLIMIT], double spike[SBLIMIT], double scale[SBLIMIT],
569 int sblimit)
571 int i;
572 double max;
574 for (i = 0; i < sblimit; i++) { /* determine the signal */
575 max = 20 * log10 (scale[i] * 32768) - 10; /* level for each subband */
576 if (spike[i] > max)
577 max = spike[i]; /* for the maximum scale */
578 max -= ltmin[i]; /* factors */
579 ltmin[i] = max;
583 void psycho_1_dump(mask power[HAN_SIZE], int *tone, int *noise) {
584 int t;
586 fprintf(stdout,"1 Ton: ");
587 t=*tone;
588 while (t!=LAST && t!=STOP) {
589 fprintf(stdout,"[%i] %3.0f ",t, power[t].x);
590 t = power[t].next;
592 fprintf(stdout,"\n");
594 fprintf(stdout,"1 Nos: ");
595 t=*noise;
596 while (t!=LAST && t!=STOP) {
597 fprintf(stdout,"[%i] %3.0f ",t, power[t].x);
598 t = power[t].next;
600 fprintf(stdout,"\n");