6 #define LONDON /* enable "LONDON" modification */
7 #define MAKE_SENSE /* enable "MAKE_SENSE" modification */
8 #define MI_OPTION /* enable "MI_OPTION" modification */
9 /**********************************************************************
11 * This module implements the psychoacoustic model I for the
12 * MPEG encoder layer II. It uses simplified tonal and noise masking
13 * threshold analysis to generate SMR for the encoder bit allocation
16 **********************************************************************/
23 add_db (double a
, double b
)
25 a
= pow (10.0, a
/ 10.0);
26 b
= pow (10.0, b
/ 10.0);
27 return 10 * log10 (a
+ b
);
31 read_cbound (int freq
)
35 //static const int FirstCriticalBand[7][27] = {...
39 if ((freq
< 0) || (freq
> 6) || (freq
== 3))
41 fprintf (stderr
, "Internal error (read_cbound())\n");
46 crit_band
= CriticalBands
[freq
][0];
47 cbound
= (int *) mem_alloc (sizeof (int) * crit_band
, "cbound");
48 for (i
= 0; i
< crit_band
; i
++)
50 k
= CriticalBands
[freq
][i
+ 1];
57 fprintf (stderr
, "Internal error (read_cbound())\n");
66 read_freq_band (g_ptr
* ltg
, int freq
)
70 #include "freqtable.h"
73 if ((freq
< 0) || (freq
> 6) || (freq
== 3))
75 fprintf (stderr
, "Internal error (read_freq_band())\n");
80 sub_size
= SecondFreqEntries
[freq
] + 1;
81 *ltg
= (g_ptr
) mem_alloc (sizeof (g_thres
) * sub_size
, "ltg");
82 (*ltg
)[0].line
= 0; /* initialize global masking threshold */
85 for (i
= 1; i
< sub_size
; i
++)
87 k
= SecondFreqSubband
[freq
][i
- 1].line
;
91 (*ltg
)[i
].bark
= SecondFreqSubband
[freq
][i
- 1].bark
;
92 (*ltg
)[i
].hear
= SecondFreqSubband
[freq
][i
- 1].hear
;
96 printf ("Internal error (read_freq_band())\n");
105 make_map (mask power
[HAN_SIZE
], g_thres
* ltg
)
106 /* this function calculates the global masking threshold */
110 for (i
= 1; i
< sub_size
; i
++)
111 for (j
= ltg
[i
- 1].line
; j
<= ltg
[i
].line
; j
++)
115 /****************************************************************
117 * Fast Fourier transform of the input samples.
119 ****************************************************************/
120 /* FIXME: meld this with the FFT in subs.c */
122 f_f_t (double sample
[FFT_SIZE
], mask power
[HAN_SIZE
])
124 int i
, j
, k
, L
, l
= 0;
126 double t_r
, t_i
, u_r
, u_i
;
127 static int M
, MM1
, init
= 0, N
;
128 double *x_r
, *x_i
, *energy
;
130 static double *w_r
, *w_i
;
132 x_r
= (double *) mem_alloc (sizeof (DFFT
), "x_r");
133 x_i
= (double *) mem_alloc (sizeof (DFFT
), "x_i");
134 energy
= (double *) mem_alloc (sizeof (DFFT
), "energy");
135 for (i
= 0; i
< FFT_SIZE
; i
++)
136 x_r
[i
] = x_i
[i
] = energy
[i
] = 0;
139 rev
= (int *) mem_alloc (sizeof (IFFT
), "rev");
140 w_r
= (double *) mem_alloc (sizeof (D10
), "w_r");
141 w_i
= (double *) mem_alloc (sizeof (D10
), "w_i");
145 for (L
= 0; L
< M
; L
++)
149 w_r
[L
] = cos (PI
/ le1
);
150 w_i
[L
] = -sin (PI
/ le1
);
152 for (i
= 0; i
< FFT_SIZE
; rev
[i
] = l
, i
++)
153 for (j
= 0, l
= 0; j
< 10; j
++)
160 memcpy ((char *) x_r
, (char *) sample
, sizeof (double) * FFT_SIZE
);
161 for (L
= 0; L
< MM1
; L
++)
167 for (j
= 0; j
< le1
; j
++)
169 for (i
= j
; i
< N
; i
+= le
)
172 t_r
= x_r
[i
] + x_r
[ip
];
173 t_i
= x_i
[i
] + x_i
[ip
];
174 x_r
[ip
] = x_r
[i
] - x_r
[ip
];
175 x_i
[ip
] = x_i
[i
] - x_i
[ip
];
179 x_r
[ip
] = x_r
[ip
] * u_r
- x_i
[ip
] * u_i
;
180 x_i
[ip
] = x_i
[ip
] * u_r
+ t_r
* u_i
;
183 u_r
= u_r
* w_r
[L
] - u_i
* w_i
[L
];
184 u_i
= u_i
* w_r
[L
] + t_r
* w_i
[L
];
187 for (i
= 0; i
< N
; i
+= 2)
190 t_r
= x_r
[i
] + x_r
[ip
];
191 t_i
= x_i
[i
] + x_i
[ip
];
192 x_r
[ip
] = x_r
[i
] - x_r
[ip
];
193 x_i
[ip
] = x_i
[i
] - x_i
[ip
];
196 energy
[i
] = x_r
[i
] * x_r
[i
] + x_i
[i
] * x_i
[i
];
198 for (i
= 0; i
< FFT_SIZE
; i
++)
202 energy
[i
] = energy
[rev
[i
]];
203 energy
[rev
[i
]] = t_r
;
205 for (i
= 0; i
< HAN_SIZE
; i
++)
206 { /* calculate power density spectrum */
207 if (energy
[i
] < 1E-20)
209 power
[i
].x
= 10 * log10 (energy
[i
]) + POWERNORM
;
210 power
[i
].next
= STOP
;
211 power
[i
].type
= FALSE
;
213 mem_free ((void **) &x_r
);
214 mem_free ((void **) &x_i
);
215 mem_free ((void **) &energy
);
218 /****************************************************************
220 * Window the incoming audio signal.
222 ****************************************************************/
225 hann_win (double sample
[FFT_SIZE
])
227 /* this function calculates a Hann window for PCM (input) samples for a 1024-pt. FFT */
229 register double sqrt_8_over_3
;
231 static double *window
;
234 { /* calculate window function for the Fourier transform */
235 window
= (double *) mem_alloc (sizeof (DFFT
), "window");
236 sqrt_8_over_3
= pow (8.0 / 3.0, 0.5);
237 for (i
= 0; i
< FFT_SIZE
; i
++)
239 /* Hann window formula */
241 sqrt_8_over_3
* 0.5 * (1 - cos (2.0 * PI
* i
/ (FFT_SIZE
))) / FFT_SIZE
;
245 for (i
= 0; i
< FFT_SIZE
; i
++)
246 sample
[i
] *= window
[i
];
249 /*******************************************************************
251 * This function finds the maximum spectral component in each
252 * subband and return them to the encoder for time-domain threshold
255 *******************************************************************/
258 pick_max (mask power
[HAN_SIZE
], double spike
[SBLIMIT
])
263 for (i
= 0; i
< HAN_SIZE
; spike
[i
>> 4] = max
, i
+= 16) /* calculate the */
264 for (j
= 0, max
= DBMIN
; j
< 16; j
++) /* maximum spectral */
265 max
= (max
> power
[i
+ j
].x
) ? max
: power
[i
+ j
].x
; /* component in each */
266 } /* subband from bound */
271 pick_max (mask power
[HAN_SIZE
], double spike
[SBLIMIT
])
276 for (i
= 0; i
< HAN_SIZE
; spike
[i
>> 4] = 10.0 * log10 (sum
), i
+= 16)
278 for (j
= 0, sum
= pow (10.0, 0.1 * DBMIN
); j
< 16; j
++) /* sum of spectral */
279 sum
+= pow (10.0, 0.1 * power
[i
+ j
].x
); /* component in each */
280 } /* subband from bound */
285 /****************************************************************
287 * This function labels the tonal component in the power
290 ****************************************************************/
293 tonal_label (mask power
[HAN_SIZE
], int *tone
)
294 /* this function extracts (tonal) sinusoidals from the spectrum */
296 int i
, j
, last
= LAST
, first
, run
, last_but_one
= LAST
; /* dpwe */
300 for (i
= 2; i
< HAN_SIZE
- 12; i
++)
302 if (power
[i
].x
> power
[i
- 1].x
&& power
[i
].x
>= power
[i
+ 1].x
)
304 power
[i
].type
= TONE
;
305 power
[i
].next
= LAST
;
307 power
[last
].next
= i
;
316 while (first
!= LAST
)
317 { /* the conditions for the tonal */
318 if (first
< 3 || first
> 500)
319 run
= 0; /* otherwise k+/-j will be out of bounds */
321 run
= 2; /* components in layer II, which */
322 else if (first
< 127)
323 run
= 3; /* are the boundaries for calc. */
324 else if (first
< 255)
325 run
= 6; /* the tonal components */
328 max
= power
[first
].x
- 7; /* after calculation of tonal */
329 for (j
= 2; j
<= run
; j
++) /* components, set to local max */
330 if (max
< power
[first
- j
].x
|| max
< power
[first
+ j
].x
)
332 power
[first
].type
= FALSE
;
335 if (power
[first
].type
== TONE
)
336 { /* extract tonal components */
340 while ((power
[help
].next
!= LAST
)
341 && (power
[help
].next
- first
) <= run
)
342 help
= power
[help
].next
;
343 help
= power
[help
].next
;
344 power
[first
].next
= help
;
345 if ((first
- last
) <= run
)
347 if (last_but_one
!= LAST
)
348 power
[last_but_one
].next
= first
;
350 if (first
> 1 && first
< 500)
351 { /* calculate the sum of the */
352 double tmp
; /* powers of the components */
353 tmp
= add_db (power
[first
- 1].x
, power
[first
+ 1].x
);
354 power
[first
].x
= add_db (power
[first
].x
, tmp
);
356 for (j
= 1; j
<= run
; j
++)
358 power
[first
- j
].x
= power
[first
+ j
].x
= DBMIN
;
359 power
[first
- j
].next
= power
[first
+ j
].next
= STOP
;
360 power
[first
- j
].type
= power
[first
+ j
].type
= FALSE
;
364 first
= power
[first
].next
;
369 if (last
== LAST
); /* *tone = power[first].next; dpwe */
371 power
[last
].next
= power
[first
].next
;
373 first
= power
[first
].next
;
374 power
[ll
].next
= STOP
;
379 /****************************************************************
381 * This function groups all the remaining non-tonal
382 * spectral lines into critical band where they are replaced by
385 ****************************************************************/
388 noise_label (mask
* power
, int *noise
, g_thres
* ltg
)
390 int i
, j
, centre
, last
= LAST
;
391 double index
, weight
, sum
;
392 /* calculate the remaining spectral */
393 for (i
= 0; i
< crit_band
- 1; i
++)
394 { /* lines for non-tonal components */
395 for (j
= cbound
[i
], weight
= 0.0, sum
= DBMIN
; j
< cbound
[i
+ 1]; j
++)
397 if (power
[j
].type
!= TONE
)
399 if (power
[j
].x
!= DBMIN
)
401 sum
= add_db (power
[j
].x
, sum
);
402 /* the line below and others under the "MAKE_SENSE" condition are an alternate
403 interpretation of "geometric mean". This approach may make more sense but
404 it has not been tested with hardware. */
406 /* weight += pow(10.0, power[j].x/10.0) * (ltg[power[j].map].bark-i);
407 bad code [SS] 21-1-93
409 weight
+= pow (10.0, power
[j
].x
/ 10.0) * (double) (j
- cbound
[i
]) / (double) (cbound
[i
+ 1] - cbound
[i
]); /* correction */
413 } /* check to see if the spectral line is low dB, and if */
414 } /* so replace the center of the critical band, which is */
415 /* the center freq. of the noise component */
419 centre
= (cbound
[i
+ 1] + cbound
[i
]) / 2;
422 index
= weight
/ pow (10.0, sum
/ 10.0);
424 cbound
[i
] + (int) (index
* (double) (cbound
[i
+ 1] - cbound
[i
]));
428 (double) (((double) cbound
[i
]) * ((double) (cbound
[i
+ 1] - 1)));
429 centre
= (int) (pow (index
, 0.5) + 0.5);
432 /* locate next non-tonal component until finished; */
433 /* add to list of non-tonal components */
435 /* Masahiro Iwadare's fix for infinite looping problem? */
436 if (power
[centre
].type
== TONE
)
438 if (power
[centre
+ 1].type
== TONE
)
444 /* Mike Li's fix for infinite looping problem */
445 if (power
[centre
].type
== FALSE
)
448 if (power
[centre
].type
== NOISE
)
450 if (power
[centre
].x
>= ltg
[power
[i
].map
].hear
)
452 if (sum
>= ltg
[power
[i
].map
].hear
)
453 sum
= add_db (power
[j
].x
, sum
);
455 sum
= power
[centre
].x
;
463 power
[centre
].next
= LAST
;
464 power
[last
].next
= centre
;
466 power
[centre
].x
= sum
;
467 power
[centre
].type
= NOISE
;
472 /****************************************************************
474 * This function reduces the number of noise and tonal
475 * component for further threshold analysis.
477 ****************************************************************/
480 subsampling (mask power
[HAN_SIZE
], g_thres
* ltg
, int *tone
, int *noise
)
485 old
= STOP
; /* calculate tonal components for */
487 { /* reduction of spectral lines */
488 if (power
[i
].x
< ltg
[power
[i
].map
].hear
)
490 power
[i
].type
= FALSE
;
493 *tone
= power
[i
].next
;
495 power
[old
].next
= power
[i
].next
;
502 old
= STOP
; /* calculate non-tonal components for */
504 { /* reduction of spectral lines */
505 if (power
[i
].x
< ltg
[power
[i
].map
].hear
)
507 power
[i
].type
= FALSE
;
510 *noise
= power
[i
].next
;
512 power
[old
].next
= power
[i
].next
;
521 { /* if more than one */
522 if (power
[i
].next
== LAST
)
523 break; /* tonal component */
524 if (ltg
[power
[power
[i
].next
].map
].bark
- /* is less than .5 */
525 ltg
[power
[i
].map
].bark
< 0.5)
526 { /* bark, take the */
527 if (power
[power
[i
].next
].x
> power
[i
].x
)
530 *tone
= power
[i
].next
;
532 power
[old
].next
= power
[i
].next
;
533 power
[i
].type
= FALSE
;
539 power
[power
[i
].next
].type
= FALSE
;
540 power
[power
[i
].next
].x
= DBMIN
;
541 power
[i
].next
= power
[power
[i
].next
].next
;
553 /****************************************************************
555 * This function calculates the individual threshold and
556 * sum with the quiet threshold to find the global threshold.
558 ****************************************************************/
561 threshold (mask power
[HAN_SIZE
], g_thres
* ltg
, int *tone
, int *noise
,
565 double dz
, tmps
, vf
= 0.0;
567 for (k
= 1; k
< sub_size
; k
++)
570 t
= *tone
; /* calculate individual masking threshold for */
572 { /* components in order to find the global */
573 if (ltg
[k
].bark
- ltg
[power
[t
].map
].bark
>= -3.0 && /*threshold (LTG) */
574 ltg
[k
].bark
- ltg
[power
[t
].map
].bark
< 8.0)
576 dz
= ltg
[k
].bark
- ltg
[power
[t
].map
].bark
; /* distance of bark value */
578 -1.525 - 0.275 * ltg
[power
[t
].map
].bark
- 4.5 + power
[t
].x
;
579 /* masking function for lower & upper slopes */
580 if (-3 <= dz
&& dz
< -1)
581 vf
= 17 * (dz
+ 1) - (0.4 * power
[t
].x
+ 6);
582 else if (-1 <= dz
&& dz
< 0)
583 vf
= (0.4 * power
[t
].x
+ 6) * dz
;
584 else if (0 <= dz
&& dz
< 1)
586 else if (1 <= dz
&& dz
< 8)
587 vf
= -(dz
- 1) * (17 - 0.15 * power
[t
].x
) - 17;
589 ltg
[k
].x
= add_db (ltg
[k
].x
, tmps
);
594 t
= *noise
; /* calculate individual masking threshold */
596 { /* for non-tonal components to find LTG */
597 if (ltg
[k
].bark
- ltg
[power
[t
].map
].bark
>= -3.0 &&
598 ltg
[k
].bark
- ltg
[power
[t
].map
].bark
< 8.0)
600 dz
= ltg
[k
].bark
- ltg
[power
[t
].map
].bark
; /* distance of bark value */
602 -1.525 - 0.175 * ltg
[power
[t
].map
].bark
- 0.5 + power
[t
].x
;
603 /* masking function for lower & upper slopes */
604 if (-3 <= dz
&& dz
< -1)
605 vf
= 17 * (dz
+ 1) - (0.4 * power
[t
].x
+ 6);
606 else if (-1 <= dz
&& dz
< 0)
607 vf
= (0.4 * power
[t
].x
+ 6) * dz
;
608 else if (0 <= dz
&& dz
< 1)
610 else if (1 <= dz
&& dz
< 8)
611 vf
= -(dz
- 1) * (17 - 0.15 * power
[t
].x
) - 17;
613 ltg
[k
].x
= add_db (ltg
[k
].x
, tmps
);
618 ltg
[k
].x
= add_db (ltg
[k
].hear
, ltg
[k
].x
);
620 ltg
[k
].x
= add_db (ltg
[k
].hear
- 12.0, ltg
[k
].x
);
624 /****************************************************************
626 * This function finds the minimum masking threshold and
627 * return the value to the encoder.
629 ****************************************************************/
632 minimum_mask (g_thres
* ltg
, double ltmin
[SBLIMIT
], int sblimit
)
638 for (i
= 0; i
< sblimit
; i
++)
639 if (j
>= sub_size
- 1) /* check subband limit, and */
640 ltmin
[i
] = ltg
[sub_size
- 1].hear
; /* calculate the minimum masking */
642 { /* level of LTMIN for each subband */
644 while (ltg
[j
].line
>> 4 == i
&& j
< sub_size
)
654 /*****************************************************************
656 * This procedure is called in musicin to pick out the
657 * smaller of the scalefactor or threshold.
659 *****************************************************************/
662 smr (double ltmin
[SBLIMIT
], double spike
[SBLIMIT
],
663 double scale
[SBLIMIT
], int sblimit
)
668 for (i
= 0; i
< sblimit
; i
++)
669 { /* determine the signal */
670 max
= 20 * log10 (scale
[i
] * 32768) - 10; /* level for each subband */
672 max
= spike
[i
]; /* for the maximum scale */
673 max
-= ltmin
[i
]; /* factors */
678 /****************************************************************
680 * This procedure calls all the necessary functions to
681 * complete the psychoacoustic analysis.
683 ****************************************************************/
686 psycho_one (short buffer
[2][1152], double scale
[2][SBLIMIT
],
687 double ltmin
[2][SBLIMIT
], frame_params
* fr_ps
)
689 layer
*info
= fr_ps
->header
;
690 int stereo
= fr_ps
->stereo
;
691 int sblimit
= fr_ps
->sblimit
;
692 int k
, i
, tone
= 0, noise
= 0;
693 static char init
= 0;
694 static int off
[2] = { 256, 256 };
697 static D1408
*fft_buf
;
698 static mask_ptr power
;
701 sample
= (double *) mem_alloc (sizeof (DFFT
), "sample");
702 spike
= (DSBL
*) mem_alloc (sizeof (D2SBL
), "spike");
703 /* call functions for critical boundaries, freq. */
705 { /* bands, bark values, and mapping */
706 fft_buf
= (D1408
*) mem_alloc ((long) sizeof (D1408
) * 2, "fft_buf");
707 power
= (mask_ptr
) mem_alloc (sizeof (mask
) * HAN_SIZE
, "power");
708 if (info
->version
== MPEG_AUDIO_ID
)
710 read_cbound (info
->sampling_frequency
);
711 read_freq_band (<g
, info
->sampling_frequency
);
715 read_cbound (info
->sampling_frequency
+ 4);
716 read_freq_band (<g
, info
->sampling_frequency
+ 4);
718 make_map (power
, ltg
);
719 for (i
= 0; i
< 1408; i
++)
720 fft_buf
[0][i
] = fft_buf
[1][i
] = 0;
723 for (k
= 0; k
< stereo
; k
++)
724 { /* check pcm input for 3 blocks of 384 samples */
725 for (i
= 0; i
< 1152; i
++)
726 fft_buf
[k
][(i
+ off
[k
]) % 1408] = (double) buffer
[k
][i
] / SCALE
;
727 for (i
= 0; i
< FFT_SIZE
; i
++)
728 sample
[i
] = fft_buf
[k
][(i
+ 1216 + off
[k
]) % 1408];
731 /* call functions for windowing PCM samples, */
732 hann_win (sample
); /* location of spectral components in each */
733 for (i
= 0; i
< HAN_SIZE
; i
++)
734 power
[i
].x
= DBMIN
; /*subband with labeling */
735 f_f_t (sample
, power
); /*locate remaining non- */
736 pick_max (power
, &spike
[k
][0]); /*tonal sinusoidals, */
737 tonal_label (power
, &tone
); /*reduce noise & tonal */
738 noise_label (power
, &noise
, ltg
); /*components, find */
739 subsampling (power
, ltg
, &tone
, &noise
); /*global & minimal */
740 threshold (power
, ltg
, &tone
, &noise
, /*threshold, and sgnl- */
741 bitrate
[info
->version
][info
->bitrate_index
] / stereo
); /*to-mask ratio */
742 minimum_mask (ltg
, <min
[k
][0], sblimit
);
743 smr (<min
[k
][0], &spike
[k
][0], &scale
[k
][0], sblimit
);
745 mem_free ((void **) &sample
);
746 mem_free ((void **) &spike
);