Merge branch 'ct' of git.pipapo.org:cinelerra-ct into ct
[cinelerra_cv/ct.git] / toolame-02l / psycho_3.c
blobddbcbfadc2fb8df59dc802d7d149c0ba20c7d0b5
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <string.h>
5 #include "common.h"
6 #include "options.h"
7 #include "encoder.h"
8 #include "mem.h"
9 #include "fft.h"
10 #include "ath.h"
11 #define OLDTHRESHx
12 #include "psycho_3.h"
13 #include "psycho_3priv.h"
15 /* This is a reimplementation of psy model 1 using the ISO11172 standard.
16 I found the original dist10 code (which is full of pointers) to be
17 a horrible thing to try and understand and debug.
18 This implementation is not built for speed, but is rather meant to
19 clearly outline the steps specified by the standard (still, it's only
20 a tiny fraction slower than the dist10 code, and nothing has been optimized)
21 MFC Feb 2003 */
23 /* Keep a table to fudge the adding of dB */
24 #define DBTAB 1000
25 static double dbtable[DBTAB];
27 #define CRITBANDMAX 32 /* this is much higher than it needs to be. really only about 24 */
28 int cbands=0; /* How many critical bands there really are */
29 int cbandindex[CRITBANDMAX]; /* The spectral line index of the start of
30 each critical band */
32 #define SUBSIZE 136
33 int freq_subset[SUBSIZE];
34 FLOAT bark[HBLKSIZE], ath[HBLKSIZE];
36 int *numlines;
37 FLOAT *cbval;
38 int partition[HBLKSIZE];
39 static D1408 *fft_buf;
41 frame_header *header;
44 INLINE double psycho_3_add_db (double a, double b)
46 /* MFC - if the difference between a and b is large (>99), then just return the
47 largest one. (about 10% of the time)
48 - For differences between 0 and 99, return the largest value, but add
49 in a pre-calculated difference value.
50 - the value 99 was chosen arbitarily.
51 - maximum (a-b) i've seen is 572 */
52 FLOAT fdiff;
53 int idiff;
54 fdiff = (10.0 * (a - b));
56 if (fdiff > 990.0) {
57 return a;
59 if (fdiff < -990.0) {
60 return (b);
63 idiff = (int) fdiff;
64 if (idiff >= 0) {
65 return (a + dbtable[idiff]);
68 return (b + dbtable[-idiff]);
71 void psycho_3 (short buffer[2][1152], double scale[2][SBLIMIT],
72 double ltmin[2][SBLIMIT], frame_info * frame, options *glopts)
74 int nch = frame->nch;
75 int sblimit = frame->sblimit;
76 int k, i;
77 static char init = 0;
78 static int off[2] = { 256, 256 };
79 FLOAT sample[BLKSIZE];
81 FLOAT energy[BLKSIZE];
82 FLOAT power[HBLKSIZE];
83 FLOAT Xtm[HBLKSIZE], Xnm[HBLKSIZE];
84 int tonelabel[HBLKSIZE], noiselabel[HBLKSIZE];
85 FLOAT LTg[HBLKSIZE];
86 double Lsb[SBLIMIT];
88 header = frame->header;
90 if (init==0) {
91 psycho_3_init(glopts);
92 init++;
96 for (k = 0; k < nch; k++) {
97 int ok = off[k] % 1408;
98 for (i = 0; i < 1152; i++) {
99 fft_buf[k][ok++] = (FLOAT) buffer[k][i] / SCALE;
100 if (ok >= 1408)
101 ok = 0;
103 ok = (off[k] + 1216) % 1408;
104 for (i = 0; i < BLKSIZE; i++) {
105 sample[i] = fft_buf[k][ok++];
106 if (ok >= 1408)
107 ok = 0;
110 off[k] += 1152;
111 off[k] %= 1408;
113 psycho_3_fft(sample, energy);
114 psycho_3_powerdensityspectrum(energy, power);
115 psycho_3_spl(Lsb, power, &scale[k][0]);
116 psycho_3_tonal_label (power, tonelabel, Xtm);
117 psycho_3_noise_label (power, energy, tonelabel, noiselabel, Xnm);
118 if (glopts->verbosity > 20)
119 psycho_3_dump(tonelabel, Xtm, noiselabel, Xnm);
120 psycho_3_decimation(ath, tonelabel, Xtm, noiselabel, Xnm, bark);
121 psycho_3_threshold(LTg, tonelabel, Xtm, noiselabel, Xnm, bark, ath, bitrate[header->version][header->bitrate_index] / nch, freq_subset);
122 psycho_3_minimummasking(LTg, &ltmin[k][0], freq_subset);
123 psycho_3_smr(&ltmin[k][0], Lsb);
127 /* ISO11172 Sec D.1 Step 1 - Window with HANN and then perform the FFT */
128 void psycho_3_fft(FLOAT sample[BLKSIZE], FLOAT energy[BLKSIZE])
130 FLOAT x_real[BLKSIZE];
131 int i;
132 static int init = 0;
133 static FLOAT *window;
135 if (!init) { /* calculate window function for the Fourier transform */
136 window = (FLOAT *) mem_alloc (sizeof (DFFT), "window");
137 register FLOAT sqrt_8_over_3 = pow (8.0 / 3.0, 0.5);
138 for (i = 0; i < BLKSIZE; i++) {
139 window[i] = sqrt_8_over_3 * 0.5 * (1 - cos (2.0 * PI * i / (BLKSIZE))) / BLKSIZE;
141 init++;
144 /* convolve the samples with the hann window */
145 for (i = 0; i < BLKSIZE; i++)
146 x_real[i] = (FLOAT) (sample[i] * window[i]);
147 /* do the FFT */
148 psycho_1_fft (x_real, energy, BLKSIZE);
151 /* Sect D.1 Step 1 - convert the energies into dB */
152 void psycho_3_powerdensityspectrum(FLOAT energy[BLKSIZE], FLOAT power[HBLKSIZE]) {
153 int i;
154 for (i=1;i<HBLKSIZE;i++) {
155 if (energy[i] < 1E-20)
156 power[i] = -200.0 + POWERNORM;
157 else
158 power[i] = 10 * log10 (energy[i]) + POWERNORM;
162 /* Sect D.1 Step 2 - Determine the sound pressure level in each subband */
163 void psycho_3_spl(double *Lsb, FLOAT *power, double *scale) {
164 int i;
165 FLOAT Xmax[SBLIMIT];
167 for (i=0;i<SBLIMIT;i++) {
168 Xmax[i] = DBMIN;
170 /* Find the maximum SPL in the power spectrum */
171 for (i=1;i<HBLKSIZE;i++) {
172 int index = i>>4;
173 if (Xmax[index] < power[i])
174 Xmax[index] = power[i];
177 /* Compare it to the sound pressure based upon the scale for this subband
178 and pick the maximum one */
179 for (i=0;i<SBLIMIT;i++) {
180 double val = 20 * log10 (scale[i] * 32768) - 10;
181 Lsb[i] = MAX(Xmax[i], val);
185 /* Sect D.1 Step 4 Label the Tonal Components */
186 void psycho_3_tonal_label (FLOAT power[HBLKSIZE], int *tonelabel, FLOAT Xtm[HBLKSIZE])
188 int i;
189 int maxima[HBLKSIZE];
191 /* Find the maxima as per ISO11172 D.1.4.a */
192 maxima[0]=maxima[HBLKSIZE-1]=0;
193 tonelabel[0]=tonelabel[HBLKSIZE-1]=0;
194 Xtm[0] = Xtm[HBLKSIZE-1] = DBMIN;
195 for (i=1;i<HBLKSIZE-1;i++) {
196 tonelabel[i] = 0;
197 Xtm[i] = DBMIN;
198 if (power[i]>power[i-1] && power[i]>power[i+1]) /* The first criteria for a maximum */
199 maxima[i]=1;
200 else
201 maxima[i]=0;
205 /* Now find the tones as per ISO11172 D.1 Step4.b */
206 /* The standard is a bit vague (surprise surprise).
207 So I'm going to assume that
208 - a tone must be 7dB greater than *all* the relevant neighbours
209 - once a tone is found, the neighbours are immediately set to -inf dB
212 psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 2, 63, 2);
213 psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 63,127,3);
214 psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 127,255,6);
215 psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 255,500,12);
220 /* Sect D.1 Step4b
221 A tone within the range (start -> end), must be 7.0 dB greater than
222 all it's neighbours within +/- srange. Don't count its immediate neighbours. */
223 void psycho_3_tonal_label_range(FLOAT *power, int *tonelabel, int *maxima, FLOAT *Xtm, int start, int end, int srange) {
224 int j,k;
226 for (k=start;k<end;k++) /* Search for all the maxima in this range */
227 if (maxima[k] == 1) {
228 tonelabel[k] = TONE; /* assume it's a TONE and then prove otherwise */
229 for (j=-srange;j<=+srange;j++) /* Check the neighbours within +/- srange */
230 if (abs(j) > 1) /* Don't count the immediate neighbours, or itself */
231 if ((power[k] - power[k+j]) < 7.0)
232 tonelabel[k] = 0; /* Not greater by 7dB, therefore not a tone */
233 if (tonelabel[k] == TONE) {
234 /* Calculate the sound pressure level for this tone by summing
235 the adjacent spectral lines
236 Xtm[k] = 10 * log10( pow(10.0, 0.1*power[k-1]) + pow(10.0, 0.1*power[k])
237 + pow(10.0, 0.1*power[k+1]) ); */
238 double temp = psycho_3_add_db(power[k-1], power[k]);
239 Xtm[k] = psycho_3_add_db(temp, power[k+1]);
241 /* *ALL* spectral lines within +/- srange are set to -inf dB
242 So that when we do the noise calculate, they are not counted */
243 for (j=-srange;j<=+srange;j++)
244 power[k+j] = DBMIN;
249 void psycho_3_init_add_db (void)
251 int i;
252 double x;
253 for (i = 0; i < DBTAB; i++) {
254 x = (double) i / 10.0;
255 dbtable[i] = 10 * log10 (1 + pow (10.0, x / 10.0)) - x;
259 /* D.1 Step 4.c Labelling non-tonal (noise) components
260 Sum the energies in each critical band (the tone energies have been removed
261 during the tone labelling).
262 Find the "geometric mean" of these energies - i.e. find the best spot to put the
263 sum of energies within this critical band. */
264 void psycho_3_noise_label (FLOAT power[HBLKSIZE], FLOAT energy[BLKSIZE], int *tonelabel, int *noiselabel, FLOAT Xnm[HBLKSIZE]) {
265 int i,j;
267 Xnm[0] = DBMIN;
268 for (i=0;i<cbands;i++) {
269 /* for each critical band */
270 double sum = DBMIN;
271 double esum=0;
272 double centreweight = 0;
273 int centre;
274 for (j=cbandindex[i]; j<cbandindex[i+1]; j++) {
275 Xnm[j] = DBMIN;
276 /* go through all the spectral lines within the critical band,
277 adding the energies. The tone energies have already been removed */
278 if (power[j] != DBMIN) {
279 /* Found a noise energy, add it to the sum */
280 sum = psycho_3_add_db(power[j], sum);
282 /* calculations for the geometric mean
283 FIXME MFC Feb 2003: Would it just be easier to
284 do the *whole* of psycho_1 in the energy domain rather than
285 in the dB domain?
286 FIXME: This is just a lazy arsed arithmetic mean. Don't know
287 if it's really going to make that much difference */
288 esum += energy[j]; /* Calculate the sum of energies */
289 centreweight += (j - cbandindex[i]) * energy[j]; /* And the energy moment */
293 if (sum<=DBMIN)
294 /* If the energy sum is really small, just pretend the noise occurs
295 in the centre frequency line */
296 centre = (cbandindex[i] + cbandindex[i+1])/2;
297 else
298 /* Otherwise, work out the mean position of the noise, and put it there. */
299 centre = cbandindex[i] + (int)(centreweight/esum);
301 Xnm[centre] = sum;
302 noiselabel[centre] = NOISE;
306 /* ISO11172 D.1 Step 5
307 Get rid of noise/tones that aren't greater than the ATH
308 If two tones are within 0.5bark, then delete the tone with the lower energy */
309 void psycho_3_decimation(FLOAT *ath, int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm, FLOAT *bark) {
310 int i;
312 /* Delete components which aren't above the ATH */
313 for (i=1;i<HBLKSIZE;i++) {
314 if (noiselabel[i]==NOISE) {
315 if (Xnm[i] < ath[i]) {
316 /* this masker isn't above the ATH : delete it */
317 Xnm[i] = DBMIN;
318 noiselabel[i]=0;
321 if (tonelabel[i] == TONE) {
322 if (Xtm[i] < ath[i]) {
323 Xtm[i] = DBMIN;
324 tonelabel[i]=0;
328 /* Search for tones that are within 0.5 bark */
329 /* MFC FIXME Feb 2003: haven't done this yet */
333 /* ISO11172 Sect D.1 Step 6
334 Calculation of individual masking thresholds
335 Work out how each of the tones&noises maskes other frequencies
336 NOTE: Only a subset of other frequencies is checked. According to the
337 standard different subbands are subsampled to different amounts.
338 See psycho_3_init and freq_subset */
339 void psycho_3_threshold(FLOAT *LTg, int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm, FLOAT *bark, FLOAT *ath, int bit_rate, int *freq_subset) {
340 int i,j,k;
341 FLOAT LTtm[SUBSIZE];
342 FLOAT LTnm[SUBSIZE];
344 for (i=0;i<SUBSIZE;i++) {
345 LTtm[i] = DBMIN;
346 LTnm[i] = DBMIN;
348 /* Loop over the entire spectrum and find every noise and tone
349 And then with each noise/tone work out how it masks
350 the spectral lines around it */
351 for (k=1;k<HBLKSIZE;k++) {
352 /* Find every tone */
353 if (tonelabel[k]==TONE) {
354 for (j=0;j<SUBSIZE;j++) {
355 /* figure out how it masks the levels around it */
356 FLOAT dz = bark[freq_subset[j]] - bark[k];
357 if (dz >= -3.0 && dz < 8.0) {
358 FLOAT vf;
359 FLOAT av = -1.525 - 0.275 * bark[k] - 4.5 + Xtm[k];
360 /* masking function for lower & upper slopes */
361 if (dz < -1)
362 vf = 17 * (dz + 1) - (0.4 * Xtm[k] + 6);
363 else if (dz < 0)
364 vf = (0.4 * Xtm[k] + 6) * dz;
365 else if (dz < 1)
366 vf = (-17 * dz);
367 else
368 vf = -(dz - 1) * (17 - 0.15 * Xtm[k]) - 17;
369 LTtm[j] = psycho_3_add_db (LTtm[j], av + vf);
374 /* find every noise label */
375 if (noiselabel[k]==NOISE) {
376 for (j=0;j<SUBSIZE;j++) {
377 /* figure out how it masks the levels around it */
378 FLOAT dz = bark[freq_subset[j]] - bark[k];
379 if (dz >= -3.0 && dz < 8.0) {
380 FLOAT vf;
381 FLOAT av = -1.525 - 0.175 * bark[k] - 0.5 + Xnm[k];
382 /* masking function for lower & upper slopes */
383 if (dz < -1)
384 vf = 17 * (dz + 1) - (0.4 * Xnm[k] + 6);
385 else if (dz < 0)
386 vf = (0.4 * Xnm[k] + 6) * dz;
387 else if (dz < 1)
388 vf = (-17 * dz);
389 else
390 vf = -(dz - 1) * (17 - 0.15 * Xnm[k]) - 17;
391 LTnm[j] = psycho_3_add_db (LTnm[j], av + vf);
397 /* ISO11172 D.1 Step 7
398 Calculate the global masking threhold */
399 for (i=0;i<SUBSIZE;i++) {
400 LTg[i] = psycho_3_add_db(LTnm[i], LTtm[i]);
401 if (bit_rate < 96)
402 LTg[i] = psycho_3_add_db(ath[freq_subset[i]], LTg[i]);
403 else
404 LTg[i] = psycho_3_add_db(ath[freq_subset[i]]-12.0, LTg[i]);
408 /* Find the minimum LTg for each subband. ISO11172 Sec D.1 Step 8 */
409 void psycho_3_minimummasking(FLOAT *LTg, double *LTmin, int *freq_subset) {
410 int i;
412 for (i=0;i<SBLIMIT;i++)
413 LTmin[i] = 999999.9;
415 for (i=0;i<SUBSIZE;i++) {
416 int index = freq_subset[i]>>4;
417 if (LTmin[index] > LTg[i]) {
418 LTmin[index] = LTg[i];
423 /* ISO11172 Sect D.1 Step 9
424 Calculate the signal-to-mask ratio
425 MFC FIXME Feb 2003 for better calling from
426 toolame, add a "float SMR[]" array and return it */
427 void psycho_3_smr(double *LTmin, double *Lsb) {
428 int i;
429 for (i=0;i<SBLIMIT;i++) {
430 LTmin[i] = Lsb[i] - LTmin[i];
434 void psycho_3_init(options *glopts) {
435 int i;
436 int cbase = 0; /* current base index for the bark range calculation */
438 fft_buf = (D1408 *) mem_alloc ((long) sizeof (D1408) * 2, "fft_buf");
440 /* Initialise the tables for the adding dB */
441 psycho_3_init_add_db();
443 /* For each spectral line calculate the bark and the ATH (in dB) */
444 FLOAT sfreq = (FLOAT) s_freq[header->version][header->sampling_frequency] * 1000;
445 for (i=1;i<HBLKSIZE; i++) {
446 FLOAT freq = i * sfreq/BLKSIZE;
447 bark[i] = toolame_freq2bark(freq);
448 ath[i] = ATH_dB(freq,glopts->athlevel);
451 { /* Work out the critical bands
452 Starting from line 0, all lines within 1 bark of the starting
453 bark are added to the same critical band. When a line is greater
454 by 1.0 of a bark, start a new critical band. */
456 numlines = (int *)calloc(HBLKSIZE, sizeof(int));
457 cbval = (float *)calloc(HBLKSIZE, sizeof(float));
458 cbandindex[0] = 1;
459 for (i=1;i<HBLKSIZE;i++) {
460 if ((bark[i] - bark[cbase]) > 1.0) { /* 1 critical band? 1 bark? */
461 /* this frequency line is too different from the starting line,
462 (in terms of the bark distance)
463 so make this spectral line the first member of the next critical band */
464 cbase = i; /* Start the new critical band from this frequency line */
465 cbands++;
466 cbandindex[cbands] = cbase;
468 /* partition[i] tells us which critical band the i'th frequency line is in */
469 partition[i] = cbands;
470 /* keep a count of how many frequency lines are in each partition */
471 numlines[cbands]++;
474 cbands++;
475 cbandindex[cbands] = 513; /* Set the top of the last critical band */
477 /* For each crtical band calculate the average bark value
478 cbval [central bark value] */
479 for (i=1;i<HBLKSIZE;i++)
480 cbval[partition[i]] += bark[i]; /* sum up all the bark values */
481 for (i=1;i<CBANDS;i++) {
482 if (numlines[i] != 0)
483 cbval[i] /= numlines[i]; /* divide by the number of values */
484 else {
485 cbval[i]=0; /* this isn't a partition */
491 /* For Step6 - For the calculation of individual masking thresholds
492 the spectral lines are subsampled
493 i.e. no need to work out the masking for every single spectral line.
494 Depending upon which subband the calculation is for, you
495 can skip a number of lines
496 There are 16 lines per subband -> 32 * 16 = 512
497 Subband 0-2 : Every line (3 * 16 = 48 lines)
498 Subband 3-5 : Every Second line (3 * 16/2 = 24 lines)
499 Subband 6-11 : Every 4th line (6 * 16/4 = 24 lines)
500 Subband 12-31 : Every 12th line (20 * 16/8 = 40 lines)
502 create this subset of frequencies (freq_subset) */
503 int freq_index=0;
504 for (i=1;i<(3*16)+1;i++)
505 freq_subset[freq_index++] = i;
506 for (;i<(6*16)+1;i+=2)
507 freq_subset[freq_index++] = i;
508 for (;i<(12*16)+1;i+=4)
509 freq_subset[freq_index++] = i;
510 for (;i<(32*16)+1;i+=8)
511 freq_subset[freq_index++] = i;
514 if (glopts->verbosity > 4) {
515 fprintf(stdout,"%i critical bands\n",cbands);
516 for (i=0;i<cbands;i++)
517 fprintf(stdout,"cband %i spectral line index %i\n",i,cbandindex[i]);
518 fprintf(stdout,"%i Subsampled spectral lines\n",SUBSIZE);
519 for (i=0;i<SUBSIZE;i++)
520 fprintf(stdout,"%i Spectral line %i Bark %.2f\n",i,freq_subset[i], bark[freq_subset[i]]);
524 void psycho_3_dump(int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm) {
525 int i;
526 fprintf(stdout,"3 Ton:");
527 for (i=1;i<HAN_SIZE;i++) {
528 if (tonelabel[i] == TONE)
529 fprintf(stdout,"[%i] %3.0f ",i,Xtm[i]);
531 fprintf(stdout,"\n");
533 fprintf(stdout,"3 Nos:");
534 for (i=1;i<HAN_SIZE;i++) {
535 if (noiselabel[i] == NOISE)
536 fprintf(stdout,"[%i] %3.0f ",i,Xnm[i]);
538 fprintf(stdout,"\n");