3 * Copyright (c) 2009 Vitor Sessak
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/channel_layout.h"
23 #include "libavutil/float_dsp.h"
34 #include "twinvq_data.h"
37 FT_SHORT
= 0, ///< Short frame (divided in n sub-blocks)
38 FT_MEDIUM
, ///< Medium frame (divided in m<n sub-blocks)
39 FT_LONG
, ///< Long frame (single sub-block + PPC)
40 FT_PPC
, ///< Periodic Peak Component (part of the long frame)
44 * Parameters and tables that are different for each frame type
47 uint8_t sub
; ///< Number subblocks in each frame
48 const uint16_t *bark_tab
;
50 /** number of distinct bark scale envelope values */
51 uint8_t bark_env_size
;
53 const int16_t *bark_cb
; ///< codebook for the bark scale envelope (BSE)
54 uint8_t bark_n_coef
;///< number of BSE CB coefficients to read
55 uint8_t bark_n_bit
; ///< number of bits of the BSE coefs
58 /** main codebooks for spectrum data */
63 uint8_t cb_len_read
; ///< number of spectrum coefficients to read
67 * Parameters and tables that are different for every combination of
71 struct FrameMode fmode
[3]; ///< frame type-dependant parameters
73 uint16_t size
; ///< frame size in samples
74 uint8_t n_lsp
; ///< number of lsp coefficients
75 const float *lspcodebook
;
77 /* number of bits of the different LSP CB coefficients */
82 uint8_t lsp_split
; ///< number of CB entries for the LSP decoding
83 const int16_t *ppc_shape_cb
; ///< PPC shape CB
85 /** number of the bits for the PPC period value */
86 uint8_t ppc_period_bit
;
88 uint8_t ppc_shape_bit
; ///< number of bits of the PPC shape CB coeffs
89 uint8_t ppc_shape_len
; ///< size of PPC shape CB
90 uint8_t pgain_bit
; ///< bits for PPC gain
92 /** constant for peak period to peak width conversion */
93 uint16_t peak_per2wid
;
96 static const ModeTab mode_08_08
= {
98 { 8, bark_tab_s08_64
, 10, tab
.fcb08s
, 1, 5, tab
.cb0808s0
, tab
.cb0808s1
, 18},
99 { 2, bark_tab_m08_256
, 20, tab
.fcb08m
, 2, 5, tab
.cb0808m0
, tab
.cb0808m1
, 16},
100 { 1, bark_tab_l08_512
, 30, tab
.fcb08l
, 3, 6, tab
.cb0808l0
, tab
.cb0808l1
, 17}
102 512 , 12, tab
.lsp08
, 1, 5, 3, 3, tab
.shape08
, 8, 28, 20, 6, 40
105 static const ModeTab mode_11_08
= {
107 { 8, bark_tab_s11_64
, 10, tab
.fcb11s
, 1, 5, tab
.cb1108s0
, tab
.cb1108s1
, 29},
108 { 2, bark_tab_m11_256
, 20, tab
.fcb11m
, 2, 5, tab
.cb1108m0
, tab
.cb1108m1
, 24},
109 { 1, bark_tab_l11_512
, 30, tab
.fcb11l
, 3, 6, tab
.cb1108l0
, tab
.cb1108l1
, 27}
111 512 , 16, tab
.lsp11
, 1, 6, 4, 3, tab
.shape11
, 9, 36, 30, 7, 90
114 static const ModeTab mode_11_10
= {
116 { 8, bark_tab_s11_64
, 10, tab
.fcb11s
, 1, 5, tab
.cb1110s0
, tab
.cb1110s1
, 21},
117 { 2, bark_tab_m11_256
, 20, tab
.fcb11m
, 2, 5, tab
.cb1110m0
, tab
.cb1110m1
, 18},
118 { 1, bark_tab_l11_512
, 30, tab
.fcb11l
, 3, 6, tab
.cb1110l0
, tab
.cb1110l1
, 20}
120 512 , 16, tab
.lsp11
, 1, 6, 4, 3, tab
.shape11
, 9, 36, 30, 7, 90
123 static const ModeTab mode_16_16
= {
125 { 8, bark_tab_s16_128
, 10, tab
.fcb16s
, 1, 5, tab
.cb1616s0
, tab
.cb1616s1
, 16},
126 { 2, bark_tab_m16_512
, 20, tab
.fcb16m
, 2, 5, tab
.cb1616m0
, tab
.cb1616m1
, 15},
127 { 1, bark_tab_l16_1024
,30, tab
.fcb16l
, 3, 6, tab
.cb1616l0
, tab
.cb1616l1
, 16}
129 1024, 16, tab
.lsp16
, 1, 6, 4, 3, tab
.shape16
, 9, 56, 60, 7, 180
132 static const ModeTab mode_22_20
= {
134 { 8, bark_tab_s22_128
, 10, tab
.fcb22s_1
, 1, 6, tab
.cb2220s0
, tab
.cb2220s1
, 18},
135 { 2, bark_tab_m22_512
, 20, tab
.fcb22m_1
, 2, 6, tab
.cb2220m0
, tab
.cb2220m1
, 17},
136 { 1, bark_tab_l22_1024
,32, tab
.fcb22l_1
, 4, 6, tab
.cb2220l0
, tab
.cb2220l1
, 18}
138 1024, 16, tab
.lsp22_1
, 1, 6, 4, 3, tab
.shape22_1
, 9, 56, 36, 7, 144
141 static const ModeTab mode_22_24
= {
143 { 8, bark_tab_s22_128
, 10, tab
.fcb22s_1
, 1, 6, tab
.cb2224s0
, tab
.cb2224s1
, 15},
144 { 2, bark_tab_m22_512
, 20, tab
.fcb22m_1
, 2, 6, tab
.cb2224m0
, tab
.cb2224m1
, 14},
145 { 1, bark_tab_l22_1024
,32, tab
.fcb22l_1
, 4, 6, tab
.cb2224l0
, tab
.cb2224l1
, 15}
147 1024, 16, tab
.lsp22_1
, 1, 6, 4, 3, tab
.shape22_1
, 9, 56, 36, 7, 144
150 static const ModeTab mode_22_32
= {
152 { 4, bark_tab_s22_128
, 10, tab
.fcb22s_2
, 1, 6, tab
.cb2232s0
, tab
.cb2232s1
, 11},
153 { 2, bark_tab_m22_256
, 20, tab
.fcb22m_2
, 2, 6, tab
.cb2232m0
, tab
.cb2232m1
, 11},
154 { 1, bark_tab_l22_512
, 32, tab
.fcb22l_2
, 4, 6, tab
.cb2232l0
, tab
.cb2232l1
, 12}
156 512 , 16, tab
.lsp22_2
, 1, 6, 4, 4, tab
.shape22_2
, 9, 56, 36, 7, 72
159 static const ModeTab mode_44_40
= {
161 {16, bark_tab_s44_128
, 10, tab
.fcb44s
, 1, 6, tab
.cb4440s0
, tab
.cb4440s1
, 18},
162 { 4, bark_tab_m44_512
, 20, tab
.fcb44m
, 2, 6, tab
.cb4440m0
, tab
.cb4440m1
, 17},
163 { 1, bark_tab_l44_2048
,40, tab
.fcb44l
, 4, 6, tab
.cb4440l0
, tab
.cb4440l1
, 17}
165 2048, 20, tab
.lsp44
, 1, 6, 4, 4, tab
.shape44
, 9, 84, 54, 7, 432
168 static const ModeTab mode_44_48
= {
170 {16, bark_tab_s44_128
, 10, tab
.fcb44s
, 1, 6, tab
.cb4448s0
, tab
.cb4448s1
, 15},
171 { 4, bark_tab_m44_512
, 20, tab
.fcb44m
, 2, 6, tab
.cb4448m0
, tab
.cb4448m1
, 14},
172 { 1, bark_tab_l44_2048
,40, tab
.fcb44l
, 4, 6, tab
.cb4448l0
, tab
.cb4448l1
, 14}
174 2048, 20, tab
.lsp44
, 1, 6, 4, 4, tab
.shape44
, 9, 84, 54, 7, 432
177 typedef struct TwinContext
{
178 AVCodecContext
*avctx
;
179 AVFloatDSPContext fdsp
;
180 FFTContext mdct_ctx
[3];
185 float lsp_hist
[2][20]; ///< LSP coefficients of the last frame
186 float bark_hist
[3][2][40]; ///< BSE coefficients of last frame
188 // bitstream parameters
189 int16_t permut
[4][4096];
190 uint8_t length
[4][2]; ///< main codebook stride
191 uint8_t length_change
[4];
192 uint8_t bits_main_spec
[2][4][2]; ///< bits for the main codebook
193 int bits_main_spec_change
[4];
197 float *curr_frame
; ///< non-interleaved output
198 float *prev_frame
; ///< non-interleaved previous frame
199 int last_block_pos
[2];
200 int discarded_packets
;
208 #define PPC_SHAPE_CB_SIZE 64
209 #define PPC_SHAPE_LEN_MAX 60
210 #define SUB_AMP_MAX 4500.0
211 #define MULAW_MU 100.0
213 #define AMP_MAX 13000.0
214 #define SUB_GAIN_BITS 5
215 #define WINDOW_TYPE_BITS 4
217 #define LSP_COEFS_MAX 20
218 #define LSP_SPLIT_MAX 4
219 #define CHANNELS_MAX 2
220 #define SUBBLOCKS_MAX 16
221 #define BARK_N_COEF_MAX 4
223 /** @note not speed critical, hence not optimized */
224 static void memset_float(float *buf
, float val
, int size
)
231 * Evaluate a single LPC amplitude spectrum envelope coefficient from the line
234 * @param lsp a vector of the cosinus of the LSP values
235 * @param cos_val cos(PI*i/N) where i is the index of the LPC amplitude
236 * @param order the order of the LSP (and the size of the *lsp buffer). Must
237 * be a multiple of four.
238 * @return the LPC value
240 * @todo reuse code from Vorbis decoder: vorbis_floor0_decode
242 static float eval_lpc_spectrum(const float *lsp
, float cos_val
, int order
)
247 float two_cos_w
= 2.0f
*cos_val
;
249 for (j
= 0; j
+ 1 < order
; j
+= 2*2) {
250 // Unroll the loop once since order is a multiple of four
251 q
*= lsp
[j
] - two_cos_w
;
252 p
*= lsp
[j
+1] - two_cos_w
;
254 q
*= lsp
[j
+2] - two_cos_w
;
255 p
*= lsp
[j
+3] - two_cos_w
;
258 p
*= p
* (2.0f
- two_cos_w
);
259 q
*= q
* (2.0f
+ two_cos_w
);
261 return 0.5 / (p
+ q
);
265 * Evaluate the LPC amplitude spectrum envelope from the line spectrum pairs.
267 static void eval_lpcenv(TwinContext
*tctx
, const float *cos_vals
, float *lpc
)
270 const ModeTab
*mtab
= tctx
->mtab
;
271 int size_s
= mtab
->size
/ mtab
->fmode
[FT_SHORT
].sub
;
273 for (i
= 0; i
< size_s
/2; i
++) {
274 float cos_i
= tctx
->cos_tabs
[0][i
];
275 lpc
[i
] = eval_lpc_spectrum(cos_vals
, cos_i
, mtab
->n_lsp
);
276 lpc
[size_s
-i
-1] = eval_lpc_spectrum(cos_vals
, -cos_i
, mtab
->n_lsp
);
280 static void interpolate(float *out
, float v1
, float v2
, int size
)
283 float step
= (v1
- v2
)/(size
+ 1);
285 for (i
= 0; i
< size
; i
++) {
291 static inline float get_cos(int idx
, int part
, const float *cos_tab
, int size
)
293 return part
? -cos_tab
[size
- idx
- 1] :
298 * Evaluate the LPC amplitude spectrum envelope from the line spectrum pairs.
299 * Probably for speed reasons, the coefficients are evaluated as
300 * siiiibiiiisiiiibiiiisiiiibiiiisiiiibiiiis ...
301 * where s is an evaluated value, i is a value interpolated from the others
302 * and b might be either calculated or interpolated, depending on an
303 * unexplained condition.
305 * @param step the size of a block "siiiibiiii"
306 * @param in the cosinus of the LSP data
307 * @param part is 0 for 0...PI (positive cossinus values) and 1 for PI...2PI
308 (negative cossinus values)
309 * @param size the size of the whole output
311 static inline void eval_lpcenv_or_interp(TwinContext
*tctx
,
312 enum FrameType ftype
,
313 float *out
, const float *in
,
314 int size
, int step
, int part
)
317 const ModeTab
*mtab
= tctx
->mtab
;
318 const float *cos_tab
= tctx
->cos_tabs
[ftype
];
321 for (i
= 0; i
< size
; i
+= step
)
323 eval_lpc_spectrum(in
,
324 get_cos(i
, part
, cos_tab
, size
),
327 // Fill the 'iiiibiiii'
328 for (i
= step
; i
<= size
- 2*step
; i
+= step
) {
329 if (out
[i
+ step
] + out
[i
- step
] > 1.95*out
[i
] ||
330 out
[i
+ step
] >= out
[i
- step
]) {
331 interpolate(out
+ i
- step
+ 1, out
[i
], out
[i
-step
], step
- 1);
334 eval_lpc_spectrum(in
,
335 get_cos(i
-step
/2, part
, cos_tab
, size
),
337 interpolate(out
+ i
- step
+ 1, out
[i
-step
/2], out
[i
-step
], step
/2 - 1);
338 interpolate(out
+ i
- step
/2 + 1, out
[i
], out
[i
-step
/2], step
/2 - 1);
342 interpolate(out
+ size
- 2*step
+ 1, out
[size
-step
], out
[size
- 2*step
], step
- 1);
345 static void eval_lpcenv_2parts(TwinContext
*tctx
, enum FrameType ftype
,
346 const float *buf
, float *lpc
,
349 eval_lpcenv_or_interp(tctx
, ftype
, lpc
, buf
, size
/2, step
, 0);
350 eval_lpcenv_or_interp(tctx
, ftype
, lpc
+ size
/2, buf
, size
/2, 2*step
, 1);
352 interpolate(lpc
+size
/2-step
+1, lpc
[size
/2], lpc
[size
/2-step
], step
);
354 memset_float(lpc
+ size
- 2*step
+ 1, lpc
[size
- 2*step
], 2*step
- 1);
358 * Inverse quantization. Read CB coefficients for cb1 and cb2 from the
359 * bitstream, sum the corresponding vectors and write the result to *out
362 static void dequant(TwinContext
*tctx
, GetBitContext
*gb
, float *out
,
363 enum FrameType ftype
,
364 const int16_t *cb0
, const int16_t *cb1
, int cb_len
)
369 for (i
= 0; i
< tctx
->n_div
[ftype
]; i
++) {
373 const int16_t *tab0
, *tab1
;
374 int length
= tctx
->length
[ftype
][i
>= tctx
->length_change
[ftype
]];
375 int bitstream_second_part
= (i
>= tctx
->bits_main_spec_change
[ftype
]);
377 int bits
= tctx
->bits_main_spec
[0][ftype
][bitstream_second_part
];
383 tmp0
= get_bits(gb
, bits
);
385 bits
= tctx
->bits_main_spec
[1][ftype
][bitstream_second_part
];
393 tmp1
= get_bits(gb
, bits
);
395 tab0
= cb0
+ tmp0
*cb_len
;
396 tab1
= cb1
+ tmp1
*cb_len
;
398 for (j
= 0; j
< length
; j
++)
399 out
[tctx
->permut
[ftype
][pos
+j
]] = sign0
*tab0
[j
] + sign1
*tab1
[j
];
406 static inline float mulawinv(float y
, float clip
, float mu
)
408 y
= av_clipf(y
/clip
, -1, 1);
409 return clip
* FFSIGN(y
) * (exp(log(1+mu
) * fabs(y
)) - 1) / mu
;
413 * Evaluate a*b/400 rounded to the nearest integer. When, for example,
414 * a*b == 200 and the nearest integer is ill-defined, use a table to emulate
415 * the following broken float-based implementation used by the binary decoder:
418 * static int very_broken_op(int a, int b)
420 * static float test; // Ugh, force gcc to do the division first...
423 * return b * test + 0.5;
427 * @note if this function is replaced by just ROUNDED_DIV(a*b,400.), the stddev
428 * between the original file (before encoding with Yamaha encoder) and the
429 * decoded output increases, which leads one to believe that the encoder expects
430 * exactly this broken calculation.
432 static int very_broken_op(int a
, int b
)
443 size
= tabs
[b
/5].size
;
444 rtab
= tabs
[b
/5].tab
;
445 return x
- rtab
[size
*av_log2(2*(x
- 1)/size
)+(x
- 1)%size
];
449 * Sum to data a periodic peak of a given period, width and shape.
451 * @param period the period of the peak divised by 400.0
453 static void add_peak(int period
, int width
, const float *shape
,
454 float ppc_gain
, float *speech
, int len
)
458 const float *shape_end
= shape
+ len
;
461 // First peak centered around zero
462 for (i
= 0; i
< width
/2; i
++)
463 speech
[i
] += ppc_gain
* *shape
++;
465 for (i
= 1; i
< ROUNDED_DIV(len
,width
) ; i
++) {
466 center
= very_broken_op(period
, i
);
467 for (j
= -width
/2; j
< (width
+1)/2; j
++)
468 speech
[j
+center
] += ppc_gain
* *shape
++;
471 // For the last block, be careful not to go beyond the end of the buffer
472 center
= very_broken_op(period
, i
);
473 for (j
= -width
/2; j
< (width
+ 1)/2 && shape
< shape_end
; j
++)
474 speech
[j
+center
] += ppc_gain
* *shape
++;
477 static void decode_ppc(TwinContext
*tctx
, int period_coef
, const float *shape
,
478 float ppc_gain
, float *speech
)
480 const ModeTab
*mtab
= tctx
->mtab
;
481 int isampf
= tctx
->avctx
->sample_rate
/1000;
482 int ibps
= tctx
->avctx
->bit_rate
/(1000 * tctx
->avctx
->channels
);
483 int min_period
= ROUNDED_DIV( 40*2*mtab
->size
, isampf
);
484 int max_period
= ROUNDED_DIV(6*40*2*mtab
->size
, isampf
);
485 int period_range
= max_period
- min_period
;
487 // This is actually the period multiplied by 400. It is just linearly coded
488 // between its maximum and minimum value.
489 int period
= min_period
+
490 ROUNDED_DIV(period_coef
*period_range
, (1 << mtab
->ppc_period_bit
) - 1);
493 if (isampf
== 22 && ibps
== 32) {
494 // For some unknown reason, NTT decided to code this case differently...
495 width
= ROUNDED_DIV((period
+ 800)* mtab
->peak_per2wid
, 400*mtab
->size
);
497 width
= (period
)* mtab
->peak_per2wid
/(400*mtab
->size
);
499 add_peak(period
, width
, shape
, ppc_gain
, speech
, mtab
->ppc_shape_len
);
502 static void dec_gain(TwinContext
*tctx
, GetBitContext
*gb
, enum FrameType ftype
,
505 const ModeTab
*mtab
= tctx
->mtab
;
507 int sub
= mtab
->fmode
[ftype
].sub
;
508 float step
= AMP_MAX
/ ((1 << GAIN_BITS
) - 1);
509 float sub_step
= SUB_AMP_MAX
/ ((1 << SUB_GAIN_BITS
) - 1);
511 if (ftype
== FT_LONG
) {
512 for (i
= 0; i
< tctx
->avctx
->channels
; i
++)
513 out
[i
] = (1./(1<<13)) *
514 mulawinv(step
* 0.5 + step
* get_bits(gb
, GAIN_BITS
),
517 for (i
= 0; i
< tctx
->avctx
->channels
; i
++) {
518 float val
= (1./(1<<23)) *
519 mulawinv(step
* 0.5 + step
* get_bits(gb
, GAIN_BITS
),
522 for (j
= 0; j
< sub
; j
++) {
524 val
*mulawinv(sub_step
* 0.5 +
525 sub_step
* get_bits(gb
, SUB_GAIN_BITS
),
526 SUB_AMP_MAX
, MULAW_MU
);
533 * Rearrange the LSP coefficients so that they have a minimum distance of
534 * min_dist. This function does it exactly as described in section of 3.2.4
535 * of the G.729 specification (but interestingly is different from what the
536 * reference decoder actually does).
538 static void rearrange_lsp(int order
, float *lsp
, float min_dist
)
541 float min_dist2
= min_dist
* 0.5;
542 for (i
= 1; i
< order
; i
++)
543 if (lsp
[i
] - lsp
[i
-1] < min_dist
) {
544 float avg
= (lsp
[i
] + lsp
[i
-1]) * 0.5;
546 lsp
[i
-1] = avg
- min_dist2
;
547 lsp
[i
] = avg
+ min_dist2
;
551 static void decode_lsp(TwinContext
*tctx
, int lpc_idx1
, uint8_t *lpc_idx2
,
552 int lpc_hist_idx
, float *lsp
, float *hist
)
554 const ModeTab
*mtab
= tctx
->mtab
;
557 const float *cb
= mtab
->lspcodebook
;
558 const float *cb2
= cb
+ (1 << mtab
->lsp_bit1
)*mtab
->n_lsp
;
559 const float *cb3
= cb2
+ (1 << mtab
->lsp_bit2
)*mtab
->n_lsp
;
561 const int8_t funny_rounding
[4] = {
563 mtab
->lsp_split
== 4 ? -2 : 1,
564 mtab
->lsp_split
== 4 ? -2 : 1,
569 for (i
= 0; i
< mtab
->lsp_split
; i
++) {
570 int chunk_end
= ((i
+ 1)*mtab
->n_lsp
+ funny_rounding
[i
])/mtab
->lsp_split
;
571 for (; j
< chunk_end
; j
++)
572 lsp
[j
] = cb
[lpc_idx1
* mtab
->n_lsp
+ j
] +
573 cb2
[lpc_idx2
[i
] * mtab
->n_lsp
+ j
];
576 rearrange_lsp(mtab
->n_lsp
, lsp
, 0.0001);
578 for (i
= 0; i
< mtab
->n_lsp
; i
++) {
579 float tmp1
= 1. - cb3
[lpc_hist_idx
*mtab
->n_lsp
+ i
];
580 float tmp2
= hist
[i
] * cb3
[lpc_hist_idx
*mtab
->n_lsp
+ i
];
582 lsp
[i
] = lsp
[i
] * tmp1
+ tmp2
;
585 rearrange_lsp(mtab
->n_lsp
, lsp
, 0.0001);
586 rearrange_lsp(mtab
->n_lsp
, lsp
, 0.000095);
587 ff_sort_nearly_sorted_floats(lsp
, mtab
->n_lsp
);
590 static void dec_lpc_spectrum_inv(TwinContext
*tctx
, float *lsp
,
591 enum FrameType ftype
, float *lpc
)
594 int size
= tctx
->mtab
->size
/ tctx
->mtab
->fmode
[ftype
].sub
;
596 for (i
= 0; i
< tctx
->mtab
->n_lsp
; i
++)
597 lsp
[i
] = 2*cos(lsp
[i
]);
601 eval_lpcenv_2parts(tctx
, ftype
, lsp
, lpc
, size
, 8);
604 eval_lpcenv_2parts(tctx
, ftype
, lsp
, lpc
, size
, 2);
607 eval_lpcenv(tctx
, lsp
, lpc
);
612 static void imdct_and_window(TwinContext
*tctx
, enum FrameType ftype
, int wtype
,
613 float *in
, float *prev
, int ch
)
615 FFTContext
*mdct
= &tctx
->mdct_ctx
[ftype
];
616 const ModeTab
*mtab
= tctx
->mtab
;
617 int bsize
= mtab
->size
/ mtab
->fmode
[ftype
].sub
;
618 int size
= mtab
->size
;
619 float *buf1
= tctx
->tmp_buf
;
621 int wsize
; // Window size
622 float *out
= tctx
->curr_frame
+ 2*ch
*mtab
->size
;
627 static const uint8_t wtype_to_wsize
[] = {0, 0, 2, 2, 2, 1, 0, 1, 1};
628 int types_sizes
[] = {
629 mtab
->size
/ mtab
->fmode
[FT_LONG
].sub
,
630 mtab
->size
/ mtab
->fmode
[FT_MEDIUM
].sub
,
631 mtab
->size
/ (2*mtab
->fmode
[FT_SHORT
].sub
),
634 wsize
= types_sizes
[wtype_to_wsize
[wtype
]];
636 prev_buf
= prev
+ (size
- bsize
)/2;
638 for (j
= 0; j
< mtab
->fmode
[ftype
].sub
; j
++) {
639 int sub_wtype
= ftype
== FT_MEDIUM
? 8 : wtype
;
641 if (!j
&& wtype
== 4)
643 else if (j
== mtab
->fmode
[ftype
].sub
-1 && wtype
== 7)
646 wsize
= types_sizes
[wtype_to_wsize
[sub_wtype
]];
648 mdct
->imdct_half(mdct
, buf1
+ bsize
*j
, in
+ bsize
*j
);
650 tctx
->fdsp
.vector_fmul_window(out2
, prev_buf
+ (bsize
-wsize
) / 2,
652 ff_sine_windows
[av_log2(wsize
)],
656 memcpy(out2
, buf1
+ bsize
*j
+ wsize
/2, (bsize
- wsize
/2)*sizeof(float));
658 out2
+= ftype
== FT_MEDIUM
? (bsize
-wsize
)/2 : bsize
- wsize
;
660 prev_buf
= buf1
+ bsize
*j
+ bsize
/2;
663 tctx
->last_block_pos
[ch
] = (size
+ first_wsize
)/2;
666 static void imdct_output(TwinContext
*tctx
, enum FrameType ftype
, int wtype
,
669 const ModeTab
*mtab
= tctx
->mtab
;
671 float *prev_buf
= tctx
->prev_frame
+ tctx
->last_block_pos
[0];
674 for (i
= 0; i
< tctx
->avctx
->channels
; i
++) {
675 imdct_and_window(tctx
, ftype
, wtype
,
676 tctx
->spectrum
+ i
*mtab
->size
,
677 prev_buf
+ 2*i
*mtab
->size
,
684 size2
= tctx
->last_block_pos
[0];
685 size1
= mtab
->size
- size2
;
687 memcpy(&out
[0][0 ], prev_buf
, size1
* sizeof(out
[0][0]));
688 memcpy(&out
[0][size1
], tctx
->curr_frame
, size2
* sizeof(out
[0][0]));
690 if (tctx
->avctx
->channels
== 2) {
691 memcpy(&out
[1][0], &prev_buf
[2*mtab
->size
], size1
* sizeof(out
[1][0]));
692 memcpy(&out
[1][size1
], &tctx
->curr_frame
[2*mtab
->size
], size2
* sizeof(out
[1][0]));
693 tctx
->fdsp
.butterflies_float(out
[0], out
[1], mtab
->size
);
697 static void dec_bark_env(TwinContext
*tctx
, const uint8_t *in
, int use_hist
,
698 int ch
, float *out
, float gain
, enum FrameType ftype
)
700 const ModeTab
*mtab
= tctx
->mtab
;
702 float *hist
= tctx
->bark_hist
[ftype
][ch
];
703 float val
= ((const float []) {0.4, 0.35, 0.28})[ftype
];
704 int bark_n_coef
= mtab
->fmode
[ftype
].bark_n_coef
;
705 int fw_cb_len
= mtab
->fmode
[ftype
].bark_env_size
/ bark_n_coef
;
708 for (i
= 0; i
< fw_cb_len
; i
++)
709 for (j
= 0; j
< bark_n_coef
; j
++, idx
++) {
711 mtab
->fmode
[ftype
].bark_cb
[fw_cb_len
*in
[j
] + i
] * (1./4096);
712 float st
= use_hist
?
713 (1. - val
) * tmp2
+ val
*hist
[idx
] + 1. : tmp2
+ 1.;
716 if (st
< -1.) st
= 1.;
718 memset_float(out
, st
* gain
, mtab
->fmode
[ftype
].bark_tab
[idx
]);
719 out
+= mtab
->fmode
[ftype
].bark_tab
[idx
];
724 static void read_and_decode_spectrum(TwinContext
*tctx
, GetBitContext
*gb
,
725 float *out
, enum FrameType ftype
)
727 const ModeTab
*mtab
= tctx
->mtab
;
728 int channels
= tctx
->avctx
->channels
;
729 int sub
= mtab
->fmode
[ftype
].sub
;
730 int block_size
= mtab
->size
/ sub
;
731 float gain
[CHANNELS_MAX
*SUBBLOCKS_MAX
];
732 float ppc_shape
[PPC_SHAPE_LEN_MAX
* CHANNELS_MAX
* 4];
733 uint8_t bark1
[CHANNELS_MAX
][SUBBLOCKS_MAX
][BARK_N_COEF_MAX
];
734 uint8_t bark_use_hist
[CHANNELS_MAX
][SUBBLOCKS_MAX
];
736 uint8_t lpc_idx1
[CHANNELS_MAX
];
737 uint8_t lpc_idx2
[CHANNELS_MAX
][LSP_SPLIT_MAX
];
738 uint8_t lpc_hist_idx
[CHANNELS_MAX
];
742 dequant(tctx
, gb
, out
, ftype
,
743 mtab
->fmode
[ftype
].cb0
, mtab
->fmode
[ftype
].cb1
,
744 mtab
->fmode
[ftype
].cb_len_read
);
746 for (i
= 0; i
< channels
; i
++)
747 for (j
= 0; j
< sub
; j
++)
748 for (k
= 0; k
< mtab
->fmode
[ftype
].bark_n_coef
; k
++)
750 get_bits(gb
, mtab
->fmode
[ftype
].bark_n_bit
);
752 for (i
= 0; i
< channels
; i
++)
753 for (j
= 0; j
< sub
; j
++)
754 bark_use_hist
[i
][j
] = get_bits1(gb
);
756 dec_gain(tctx
, gb
, ftype
, gain
);
758 for (i
= 0; i
< channels
; i
++) {
759 lpc_hist_idx
[i
] = get_bits(gb
, tctx
->mtab
->lsp_bit0
);
760 lpc_idx1
[i
] = get_bits(gb
, tctx
->mtab
->lsp_bit1
);
762 for (j
= 0; j
< tctx
->mtab
->lsp_split
; j
++)
763 lpc_idx2
[i
][j
] = get_bits(gb
, tctx
->mtab
->lsp_bit2
);
766 if (ftype
== FT_LONG
) {
767 int cb_len_p
= (tctx
->n_div
[3] + mtab
->ppc_shape_len
*channels
- 1)/
769 dequant(tctx
, gb
, ppc_shape
, FT_PPC
, mtab
->ppc_shape_cb
,
770 mtab
->ppc_shape_cb
+ cb_len_p
*PPC_SHAPE_CB_SIZE
, cb_len_p
);
773 for (i
= 0; i
< channels
; i
++) {
774 float *chunk
= out
+ mtab
->size
* i
;
775 float lsp
[LSP_COEFS_MAX
];
777 for (j
= 0; j
< sub
; j
++) {
778 dec_bark_env(tctx
, bark1
[i
][j
], bark_use_hist
[i
][j
], i
,
779 tctx
->tmp_buf
, gain
[sub
*i
+j
], ftype
);
781 tctx
->fdsp
.vector_fmul(chunk
+ block_size
*j
, chunk
+ block_size
*j
,
782 tctx
->tmp_buf
, block_size
);
786 if (ftype
== FT_LONG
) {
787 float pgain_step
= 25000. / ((1 << mtab
->pgain_bit
) - 1);
788 int p_coef
= get_bits(gb
, tctx
->mtab
->ppc_period_bit
);
789 int g_coef
= get_bits(gb
, tctx
->mtab
->pgain_bit
);
791 mulawinv(pgain_step
*g_coef
+ pgain_step
/2, 25000., PGAIN_MU
);
793 decode_ppc(tctx
, p_coef
, ppc_shape
+ i
*mtab
->ppc_shape_len
, v
,
797 decode_lsp(tctx
, lpc_idx1
[i
], lpc_idx2
[i
], lpc_hist_idx
[i
], lsp
,
800 dec_lpc_spectrum_inv(tctx
, lsp
, ftype
, tctx
->tmp_buf
);
802 for (j
= 0; j
< mtab
->fmode
[ftype
].sub
; j
++) {
803 tctx
->fdsp
.vector_fmul(chunk
, chunk
, tctx
->tmp_buf
, block_size
);
809 static int twin_decode_frame(AVCodecContext
* avctx
, void *data
,
810 int *got_frame_ptr
, AVPacket
*avpkt
)
812 AVFrame
*frame
= data
;
813 const uint8_t *buf
= avpkt
->data
;
814 int buf_size
= avpkt
->size
;
815 TwinContext
*tctx
= avctx
->priv_data
;
817 const ModeTab
*mtab
= tctx
->mtab
;
819 enum FrameType ftype
;
820 int window_type
, ret
;
821 static const enum FrameType wtype_to_ftype_table
[] = {
822 FT_LONG
, FT_LONG
, FT_SHORT
, FT_LONG
,
823 FT_MEDIUM
, FT_LONG
, FT_LONG
, FT_MEDIUM
, FT_MEDIUM
826 if (buf_size
*8 < avctx
->bit_rate
*mtab
->size
/avctx
->sample_rate
+ 8) {
827 av_log(avctx
, AV_LOG_ERROR
,
828 "Frame too small (%d bytes). Truncated file?\n", buf_size
);
829 return AVERROR(EINVAL
);
832 /* get output buffer */
833 if (tctx
->discarded_packets
>= 2) {
834 frame
->nb_samples
= mtab
->size
;
835 if ((ret
= ff_get_buffer(avctx
, frame
, 0)) < 0) {
836 av_log(avctx
, AV_LOG_ERROR
, "get_buffer() failed\n");
839 out
= (float **)frame
->extended_data
;
842 init_get_bits(&gb
, buf
, buf_size
* 8);
843 skip_bits(&gb
, get_bits(&gb
, 8));
844 window_type
= get_bits(&gb
, WINDOW_TYPE_BITS
);
846 if (window_type
> 8) {
847 av_log(avctx
, AV_LOG_ERROR
, "Invalid window type, broken sample?\n");
851 ftype
= wtype_to_ftype_table
[window_type
];
853 read_and_decode_spectrum(tctx
, &gb
, tctx
->spectrum
, ftype
);
855 imdct_output(tctx
, ftype
, window_type
, out
);
857 FFSWAP(float*, tctx
->curr_frame
, tctx
->prev_frame
);
859 if (tctx
->discarded_packets
< 2) {
860 tctx
->discarded_packets
++;
871 * Init IMDCT and windowing tables
873 static av_cold
int init_mdct_win(TwinContext
*tctx
)
876 const ModeTab
*mtab
= tctx
->mtab
;
877 int size_s
= mtab
->size
/ mtab
->fmode
[FT_SHORT
].sub
;
878 int size_m
= mtab
->size
/ mtab
->fmode
[FT_MEDIUM
].sub
;
879 int channels
= tctx
->avctx
->channels
;
880 float norm
= channels
== 1 ? 2. : 1.;
882 for (i
= 0; i
< 3; i
++) {
883 int bsize
= tctx
->mtab
->size
/tctx
->mtab
->fmode
[i
].sub
;
884 if ((ret
= ff_mdct_init(&tctx
->mdct_ctx
[i
], av_log2(bsize
) + 1, 1,
885 -sqrt(norm
/bsize
) / (1<<15))))
889 FF_ALLOC_OR_GOTO(tctx
->avctx
, tctx
->tmp_buf
,
890 mtab
->size
* sizeof(*tctx
->tmp_buf
), alloc_fail
);
892 FF_ALLOC_OR_GOTO(tctx
->avctx
, tctx
->spectrum
,
893 2 * mtab
->size
* channels
* sizeof(*tctx
->spectrum
),
895 FF_ALLOC_OR_GOTO(tctx
->avctx
, tctx
->curr_frame
,
896 2 * mtab
->size
* channels
* sizeof(*tctx
->curr_frame
),
898 FF_ALLOC_OR_GOTO(tctx
->avctx
, tctx
->prev_frame
,
899 2 * mtab
->size
* channels
* sizeof(*tctx
->prev_frame
),
902 for (i
= 0; i
< 3; i
++) {
903 int m
= 4*mtab
->size
/mtab
->fmode
[i
].sub
;
904 double freq
= 2*M_PI
/m
;
905 FF_ALLOC_OR_GOTO(tctx
->avctx
, tctx
->cos_tabs
[i
],
906 (m
/ 4) * sizeof(*tctx
->cos_tabs
[i
]), alloc_fail
);
908 for (j
= 0; j
<= m
/8; j
++)
909 tctx
->cos_tabs
[i
][j
] = cos((2*j
+ 1)*freq
);
910 for (j
= 1; j
< m
/8; j
++)
911 tctx
->cos_tabs
[i
][m
/4-j
] = tctx
->cos_tabs
[i
][j
];
915 ff_init_ff_sine_windows(av_log2(size_m
));
916 ff_init_ff_sine_windows(av_log2(size_s
/2));
917 ff_init_ff_sine_windows(av_log2(mtab
->size
));
921 return AVERROR(ENOMEM
);
925 * Interpret the data as if it were a num_blocks x line_len[0] matrix and for
926 * each line do a cyclic permutation, i.e.
927 * abcdefghijklm -> defghijklmabc
928 * where the amount to be shifted is evaluated depending on the column.
930 static void permutate_in_line(int16_t *tab
, int num_vect
, int num_blocks
,
932 const uint8_t line_len
[2], int length_div
,
933 enum FrameType ftype
)
938 for (i
= 0; i
< line_len
[0]; i
++) {
941 if (num_blocks
== 1 ||
942 (ftype
== FT_LONG
&& num_vect
% num_blocks
) ||
943 (ftype
!= FT_LONG
&& num_vect
& 1 ) ||
946 } else if (ftype
== FT_LONG
) {
951 for (j
= 0; j
< num_vect
&& (j
+num_vect
*i
< block_size
*num_blocks
); j
++)
952 tab
[i
*num_vect
+j
] = i
*num_vect
+ (j
+ shift
) % num_vect
;
957 * Interpret the input data as in the following table:
968 * and transpose it, giving the output
969 * aiqxbjr1cks2dlt3emu4fvn5gow6hp
971 static void transpose_perm(int16_t *out
, int16_t *in
, int num_vect
,
972 const uint8_t line_len
[2], int length_div
)
976 for (i
= 0; i
< num_vect
; i
++)
977 for (j
= 0; j
< line_len
[i
>= length_div
]; j
++)
978 out
[cont
++] = in
[j
*num_vect
+ i
];
981 static void linear_perm(int16_t *out
, int16_t *in
, int n_blocks
, int size
)
983 int block_size
= size
/n_blocks
;
986 for (i
= 0; i
< size
; i
++)
987 out
[i
] = block_size
* (in
[i
] % n_blocks
) + in
[i
] / n_blocks
;
990 static av_cold
void construct_perm_table(TwinContext
*tctx
,enum FrameType ftype
)
993 const ModeTab
*mtab
= tctx
->mtab
;
995 int16_t *tmp_perm
= (int16_t *) tctx
->tmp_buf
;
997 if (ftype
== FT_PPC
) {
998 size
= tctx
->avctx
->channels
;
999 block_size
= mtab
->ppc_shape_len
;
1001 size
= tctx
->avctx
->channels
* mtab
->fmode
[ftype
].sub
;
1002 block_size
= mtab
->size
/ mtab
->fmode
[ftype
].sub
;
1005 permutate_in_line(tmp_perm
, tctx
->n_div
[ftype
], size
,
1006 block_size
, tctx
->length
[ftype
],
1007 tctx
->length_change
[ftype
], ftype
);
1009 transpose_perm(tctx
->permut
[ftype
], tmp_perm
, tctx
->n_div
[ftype
],
1010 tctx
->length
[ftype
], tctx
->length_change
[ftype
]);
1012 linear_perm(tctx
->permut
[ftype
], tctx
->permut
[ftype
], size
,
1016 static av_cold
void init_bitstream_params(TwinContext
*tctx
)
1018 const ModeTab
*mtab
= tctx
->mtab
;
1019 int n_ch
= tctx
->avctx
->channels
;
1020 int total_fr_bits
= tctx
->avctx
->bit_rate
*mtab
->size
/
1021 tctx
->avctx
->sample_rate
;
1023 int lsp_bits_per_block
= n_ch
*(mtab
->lsp_bit0
+ mtab
->lsp_bit1
+
1024 mtab
->lsp_split
*mtab
->lsp_bit2
);
1026 int ppc_bits
= n_ch
*(mtab
->pgain_bit
+ mtab
->ppc_shape_bit
+
1027 mtab
->ppc_period_bit
);
1029 int bsize_no_main_cb
[3];
1032 enum FrameType frametype
;
1034 for (i
= 0; i
< 3; i
++)
1035 // +1 for history usage switch
1036 bse_bits
[i
] = n_ch
*
1037 (mtab
->fmode
[i
].bark_n_coef
* mtab
->fmode
[i
].bark_n_bit
+ 1);
1039 bsize_no_main_cb
[2] = bse_bits
[2] + lsp_bits_per_block
+ ppc_bits
+
1040 WINDOW_TYPE_BITS
+ n_ch
*GAIN_BITS
;
1042 for (i
= 0; i
< 2; i
++)
1043 bsize_no_main_cb
[i
] =
1044 lsp_bits_per_block
+ n_ch
*GAIN_BITS
+ WINDOW_TYPE_BITS
+
1045 mtab
->fmode
[i
].sub
*(bse_bits
[i
] + n_ch
*SUB_GAIN_BITS
);
1047 // The remaining bits are all used for the main spectrum coefficients
1048 for (i
= 0; i
< 4; i
++) {
1051 int rounded_up
, rounded_down
, num_rounded_down
, num_rounded_up
;
1053 bit_size
= n_ch
* mtab
->ppc_shape_bit
;
1054 vect_size
= n_ch
* mtab
->ppc_shape_len
;
1056 bit_size
= total_fr_bits
- bsize_no_main_cb
[i
];
1057 vect_size
= n_ch
* mtab
->size
;
1060 tctx
->n_div
[i
] = (bit_size
+ 13) / 14;
1062 rounded_up
= (bit_size
+ tctx
->n_div
[i
] - 1)/tctx
->n_div
[i
];
1063 rounded_down
= (bit_size
)/tctx
->n_div
[i
];
1064 num_rounded_down
= rounded_up
* tctx
->n_div
[i
] - bit_size
;
1065 num_rounded_up
= tctx
->n_div
[i
] - num_rounded_down
;
1066 tctx
->bits_main_spec
[0][i
][0] = (rounded_up
+ 1)/2;
1067 tctx
->bits_main_spec
[1][i
][0] = (rounded_up
)/2;
1068 tctx
->bits_main_spec
[0][i
][1] = (rounded_down
+ 1)/2;
1069 tctx
->bits_main_spec
[1][i
][1] = (rounded_down
)/2;
1070 tctx
->bits_main_spec_change
[i
] = num_rounded_up
;
1072 rounded_up
= (vect_size
+ tctx
->n_div
[i
] - 1)/tctx
->n_div
[i
];
1073 rounded_down
= (vect_size
)/tctx
->n_div
[i
];
1074 num_rounded_down
= rounded_up
* tctx
->n_div
[i
] - vect_size
;
1075 num_rounded_up
= tctx
->n_div
[i
] - num_rounded_down
;
1076 tctx
->length
[i
][0] = rounded_up
;
1077 tctx
->length
[i
][1] = rounded_down
;
1078 tctx
->length_change
[i
] = num_rounded_up
;
1081 for (frametype
= FT_SHORT
; frametype
<= FT_PPC
; frametype
++)
1082 construct_perm_table(tctx
, frametype
);
1085 static av_cold
int twin_decode_close(AVCodecContext
*avctx
)
1087 TwinContext
*tctx
= avctx
->priv_data
;
1090 for (i
= 0; i
< 3; i
++) {
1091 ff_mdct_end(&tctx
->mdct_ctx
[i
]);
1092 av_free(tctx
->cos_tabs
[i
]);
1096 av_free(tctx
->curr_frame
);
1097 av_free(tctx
->spectrum
);
1098 av_free(tctx
->prev_frame
);
1099 av_free(tctx
->tmp_buf
);
1104 static av_cold
int twin_decode_init(AVCodecContext
*avctx
)
1107 TwinContext
*tctx
= avctx
->priv_data
;
1110 tctx
->avctx
= avctx
;
1111 avctx
->sample_fmt
= AV_SAMPLE_FMT_FLTP
;
1113 if (!avctx
->extradata
|| avctx
->extradata_size
< 12) {
1114 av_log(avctx
, AV_LOG_ERROR
, "Missing or incomplete extradata\n");
1115 return AVERROR_INVALIDDATA
;
1117 avctx
->channels
= AV_RB32(avctx
->extradata
) + 1;
1118 avctx
->bit_rate
= AV_RB32(avctx
->extradata
+ 4) * 1000;
1119 isampf
= AV_RB32(avctx
->extradata
+ 8);
1121 if (isampf
< 8 || isampf
> 44) {
1122 av_log(avctx
, AV_LOG_ERROR
, "Unsupported sample rate\n");
1123 return AVERROR_INVALIDDATA
;
1126 case 44: avctx
->sample_rate
= 44100; break;
1127 case 22: avctx
->sample_rate
= 22050; break;
1128 case 11: avctx
->sample_rate
= 11025; break;
1129 default: avctx
->sample_rate
= isampf
* 1000; break;
1132 if (avctx
->channels
<= 0 || avctx
->channels
> CHANNELS_MAX
) {
1133 av_log(avctx
, AV_LOG_ERROR
, "Unsupported number of channels: %i\n",
1137 avctx
->channel_layout
= avctx
->channels
== 1 ? AV_CH_LAYOUT_MONO
:
1138 AV_CH_LAYOUT_STEREO
;
1140 ibps
= avctx
->bit_rate
/ (1000 * avctx
->channels
);
1142 switch ((isampf
<< 8) + ibps
) {
1143 case (8 <<8) + 8: tctx
->mtab
= &mode_08_08
; break;
1144 case (11<<8) + 8: tctx
->mtab
= &mode_11_08
; break;
1145 case (11<<8) + 10: tctx
->mtab
= &mode_11_10
; break;
1146 case (16<<8) + 16: tctx
->mtab
= &mode_16_16
; break;
1147 case (22<<8) + 20: tctx
->mtab
= &mode_22_20
; break;
1148 case (22<<8) + 24: tctx
->mtab
= &mode_22_24
; break;
1149 case (22<<8) + 32: tctx
->mtab
= &mode_22_32
; break;
1150 case (44<<8) + 40: tctx
->mtab
= &mode_44_40
; break;
1151 case (44<<8) + 48: tctx
->mtab
= &mode_44_48
; break;
1153 av_log(avctx
, AV_LOG_ERROR
, "This version does not support %d kHz - %d kbit/s/ch mode.\n", isampf
, isampf
);
1157 avpriv_float_dsp_init(&tctx
->fdsp
, avctx
->flags
& CODEC_FLAG_BITEXACT
);
1158 if ((ret
= init_mdct_win(tctx
))) {
1159 av_log(avctx
, AV_LOG_ERROR
, "Error initializing MDCT\n");
1160 twin_decode_close(avctx
);
1163 init_bitstream_params(tctx
);
1165 memset_float(tctx
->bark_hist
[0][0], 0.1, FF_ARRAY_ELEMS(tctx
->bark_hist
));
1170 AVCodec ff_twinvq_decoder
= {
1172 .type
= AVMEDIA_TYPE_AUDIO
,
1173 .id
= AV_CODEC_ID_TWINVQ
,
1174 .priv_data_size
= sizeof(TwinContext
),
1175 .init
= twin_decode_init
,
1176 .close
= twin_decode_close
,
1177 .decode
= twin_decode_frame
,
1178 .capabilities
= CODEC_CAP_DR1
,
1179 .long_name
= NULL_IF_CONFIG_SMALL("VQF TwinVQ"),
1180 .sample_fmts
= (const enum AVSampleFormat
[]) { AV_SAMPLE_FMT_FLTP
,
1181 AV_SAMPLE_FMT_NONE
},