2 * WMA compatible encoder
3 * Copyright (c) 2007 Michael Niedermayer
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/attributes.h"
32 static av_cold
int encode_init(AVCodecContext
*avctx
)
34 WMACodecContext
*s
= avctx
->priv_data
;
35 int i
, flags1
, flags2
, block_align
;
40 if (avctx
->channels
> MAX_CHANNELS
) {
41 av_log(avctx
, AV_LOG_ERROR
,
42 "too many channels: got %i, need %i or fewer",
43 avctx
->channels
, MAX_CHANNELS
);
44 return AVERROR(EINVAL
);
47 if (avctx
->sample_rate
> 48000) {
48 av_log(avctx
, AV_LOG_ERROR
, "sample rate is too high: %d > 48kHz",
50 return AVERROR(EINVAL
);
53 if (avctx
->bit_rate
< 24 * 1000) {
54 av_log(avctx
, AV_LOG_ERROR
,
55 "bitrate too low: got %i, need 24000 or higher\n",
57 return AVERROR(EINVAL
);
60 /* extract flag info */
63 if (avctx
->codec
->id
== AV_CODEC_ID_WMAV1
) {
64 extradata
= av_malloc(4);
66 return AVERROR(ENOMEM
);
67 avctx
->extradata_size
= 4;
68 AV_WL16(extradata
, flags1
);
69 AV_WL16(extradata
+ 2, flags2
);
70 } else if (avctx
->codec
->id
== AV_CODEC_ID_WMAV2
) {
71 extradata
= av_mallocz(10);
73 return AVERROR(ENOMEM
);
74 avctx
->extradata_size
= 10;
75 AV_WL32(extradata
, flags1
);
76 AV_WL16(extradata
+ 4, flags2
);
80 avctx
->extradata
= extradata
;
81 s
->use_exp_vlc
= flags2
& 0x0001;
82 s
->use_bit_reservoir
= flags2
& 0x0002;
83 s
->use_variable_block_len
= flags2
& 0x0004;
84 if (avctx
->channels
== 2)
87 ff_wma_init(avctx
, flags2
);
90 for (i
= 0; i
< s
->nb_block_sizes
; i
++)
91 ff_mdct_init(&s
->mdct_ctx
[i
], s
->frame_len_bits
- i
+ 1, 0, 1.0);
93 block_align
= avctx
->bit_rate
* (int64_t) s
->frame_len
/
94 (avctx
->sample_rate
* 8);
95 block_align
= FFMIN(block_align
, MAX_CODED_SUPERFRAME_SIZE
);
96 avctx
->block_align
= block_align
;
97 avctx
->bit_rate
= avctx
->block_align
* 8LL * avctx
->sample_rate
/
99 avctx
->frame_size
= avctx
->initial_padding
= s
->frame_len
;
104 static void apply_window_and_mdct(AVCodecContext
*avctx
, const AVFrame
*frame
)
106 WMACodecContext
*s
= avctx
->priv_data
;
107 float **audio
= (float **) frame
->extended_data
;
108 int len
= frame
->nb_samples
;
109 int window_index
= s
->frame_len_bits
- s
->block_len_bits
;
110 FFTContext
*mdct
= &s
->mdct_ctx
[window_index
];
112 const float *win
= s
->windows
[window_index
];
113 int window_len
= 1 << s
->block_len_bits
;
114 float n
= 2.0 * 32768.0 / window_len
;
116 for (ch
= 0; ch
< avctx
->channels
; ch
++) {
117 memcpy(s
->output
, s
->frame_out
[ch
], window_len
* sizeof(*s
->output
));
118 s
->fdsp
.vector_fmul_scalar(s
->frame_out
[ch
], audio
[ch
], n
, len
);
119 s
->fdsp
.vector_fmul_reverse(&s
->output
[window_len
], s
->frame_out
[ch
],
121 s
->fdsp
.vector_fmul(s
->frame_out
[ch
], s
->frame_out
[ch
], win
, len
);
122 mdct
->mdct_calc(mdct
, s
->coefs
[ch
], s
->output
);
126 // FIXME use for decoding too
127 static void init_exp(WMACodecContext
*s
, int ch
, const int *exp_param
)
131 float v
, *q
, max_scale
, *q_end
;
133 ptr
= s
->exponent_bands
[s
->frame_len_bits
- s
->block_len_bits
];
134 q
= s
->exponents
[ch
];
135 q_end
= q
+ s
->block_len
;
138 /* XXX: use a table */
139 v
= pow(10, *exp_param
++ *(1.0 / 16.0));
140 max_scale
= FFMAX(max_scale
, v
);
146 s
->max_exponent
[ch
] = max_scale
;
149 static void encode_exp_vlc(WMACodecContext
*s
, int ch
, const int *exp_param
)
155 ptr
= s
->exponent_bands
[s
->frame_len_bits
- s
->block_len_bits
];
156 q
= s
->exponents
[ch
];
157 q_end
= q
+ s
->block_len
;
158 if (s
->version
== 1) {
159 last_exp
= *exp_param
++;
160 assert(last_exp
- 10 >= 0 && last_exp
- 10 < 32);
161 put_bits(&s
->pb
, 5, last_exp
- 10);
166 int exp
= *exp_param
++;
167 int code
= exp
- last_exp
+ 60;
168 assert(code
>= 0 && code
< 120);
169 put_bits(&s
->pb
, ff_aac_scalefactor_bits
[code
],
170 ff_aac_scalefactor_code
[code
]);
171 /* XXX: use a table */
177 static int encode_block(WMACodecContext
*s
, float (*src_coefs
)[BLOCK_MAX_SIZE
],
180 int v
, bsize
, ch
, coef_nb_bits
, parse_exponents
;
182 int nb_coefs
[MAX_CHANNELS
];
183 static const int fixed_exp
[25] = {
191 // FIXME remove duplication relative to decoder
192 if (s
->use_variable_block_len
) {
193 assert(0); // FIXME not implemented
195 /* fixed block len */
196 s
->next_block_len_bits
= s
->frame_len_bits
;
197 s
->prev_block_len_bits
= s
->frame_len_bits
;
198 s
->block_len_bits
= s
->frame_len_bits
;
201 s
->block_len
= 1 << s
->block_len_bits
;
202 // assert((s->block_pos + s->block_len) <= s->frame_len);
203 bsize
= s
->frame_len_bits
- s
->block_len_bits
;
206 v
= s
->coefs_end
[bsize
] - s
->coefs_start
;
207 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++)
210 int n4
= s
->block_len
/ 2;
211 mdct_norm
= 1.0 / (float) n4
;
213 mdct_norm
*= sqrt(n4
);
216 if (s
->avctx
->channels
== 2)
217 put_bits(&s
->pb
, 1, !!s
->ms_stereo
);
219 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
220 // FIXME only set channel_coded when needed, instead of always
221 s
->channel_coded
[ch
] = 1;
222 if (s
->channel_coded
[ch
])
223 init_exp(s
, ch
, fixed_exp
);
226 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
227 if (s
->channel_coded
[ch
]) {
229 float *coefs
, *exponents
, mult
;
232 coefs1
= s
->coefs1
[ch
];
233 exponents
= s
->exponents
[ch
];
234 mult
= pow(10, total_gain
* 0.05) / s
->max_exponent
[ch
];
236 coefs
= src_coefs
[ch
];
237 if (s
->use_noise_coding
&& 0) {
238 assert(0); // FIXME not implemented
240 coefs
+= s
->coefs_start
;
242 for (i
= 0; i
< n
; i
++) {
243 double t
= *coefs
++ / (exponents
[i
] * mult
);
244 if (t
< -32768 || t
> 32767)
247 coefs1
[i
] = lrint(t
);
254 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
255 int a
= s
->channel_coded
[ch
];
256 put_bits(&s
->pb
, 1, a
);
263 for (v
= total_gain
- 1; v
>= 127; v
-= 127)
264 put_bits(&s
->pb
, 7, 127);
265 put_bits(&s
->pb
, 7, v
);
267 coef_nb_bits
= ff_wma_total_gain_to_bits(total_gain
);
269 if (s
->use_noise_coding
) {
270 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
271 if (s
->channel_coded
[ch
]) {
273 n
= s
->exponent_high_sizes
[bsize
];
274 for (i
= 0; i
< n
; i
++) {
275 put_bits(&s
->pb
, 1, s
->high_band_coded
[ch
][i
] = 0);
277 nb_coefs
[ch
] -= s
->exponent_high_bands
[bsize
][i
];
284 if (s
->block_len_bits
!= s
->frame_len_bits
)
285 put_bits(&s
->pb
, 1, parse_exponents
);
287 if (parse_exponents
) {
288 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
289 if (s
->channel_coded
[ch
]) {
290 if (s
->use_exp_vlc
) {
291 encode_exp_vlc(s
, ch
, fixed_exp
);
293 assert(0); // FIXME not implemented
294 // encode_exp_lsp(s, ch);
299 assert(0); // FIXME not implemented
301 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
302 if (s
->channel_coded
[ch
]) {
305 tindex
= (ch
== 1 && s
->ms_stereo
);
306 ptr
= &s
->coefs1
[ch
][0];
307 eptr
= ptr
+ nb_coefs
[ch
];
310 for (; ptr
< eptr
; ptr
++) {
313 int abs_level
= FFABS(level
);
315 if (abs_level
<= s
->coef_vlcs
[tindex
]->max_level
)
316 if (run
< s
->coef_vlcs
[tindex
]->levels
[abs_level
- 1])
317 code
= run
+ s
->int_table
[tindex
][abs_level
- 1];
319 assert(code
< s
->coef_vlcs
[tindex
]->n
);
320 put_bits(&s
->pb
, s
->coef_vlcs
[tindex
]->huffbits
[code
],
321 s
->coef_vlcs
[tindex
]->huffcodes
[code
]);
324 if (1 << coef_nb_bits
<= abs_level
)
327 put_bits(&s
->pb
, coef_nb_bits
, abs_level
);
328 put_bits(&s
->pb
, s
->frame_len_bits
, run
);
330 // FIXME the sign is flipped somewhere
331 put_bits(&s
->pb
, 1, level
< 0);
337 put_bits(&s
->pb
, s
->coef_vlcs
[tindex
]->huffbits
[1],
338 s
->coef_vlcs
[tindex
]->huffcodes
[1]);
340 if (s
->version
== 1 && s
->avctx
->channels
>= 2)
341 avpriv_align_put_bits(&s
->pb
);
346 static int encode_frame(WMACodecContext
*s
, float (*src_coefs
)[BLOCK_MAX_SIZE
],
347 uint8_t *buf
, int buf_size
, int total_gain
)
349 init_put_bits(&s
->pb
, buf
, buf_size
);
351 if (s
->use_bit_reservoir
)
352 assert(0); // FIXME not implemented
353 else if (encode_block(s
, src_coefs
, total_gain
) < 0)
356 avpriv_align_put_bits(&s
->pb
);
358 return put_bits_count(&s
->pb
) / 8 - s
->avctx
->block_align
;
361 static int encode_superframe(AVCodecContext
*avctx
, AVPacket
*avpkt
,
362 const AVFrame
*frame
, int *got_packet_ptr
)
364 WMACodecContext
*s
= avctx
->priv_data
;
365 int i
, total_gain
, ret
;
367 s
->block_len_bits
= s
->frame_len_bits
; // required by non variable block len
368 s
->block_len
= 1 << s
->block_len_bits
;
370 apply_window_and_mdct(avctx
, frame
);
376 for (i
= 0; i
< s
->block_len
; i
++) {
377 a
= s
->coefs
[0][i
] * 0.5;
378 b
= s
->coefs
[1][i
] * 0.5;
379 s
->coefs
[0][i
] = a
+ b
;
380 s
->coefs
[1][i
] = a
- b
;
384 if ((ret
= ff_alloc_packet(avpkt
, 2 * MAX_CODED_SUPERFRAME_SIZE
))) {
385 av_log(avctx
, AV_LOG_ERROR
, "Error getting output packet\n");
390 for (i
= 64; i
; i
>>= 1) {
391 int error
= encode_frame(s
, s
->coefs
, avpkt
->data
, avpkt
->size
,
397 if ((i
= encode_frame(s
, s
->coefs
, avpkt
->data
, avpkt
->size
, total_gain
)) >= 0) {
398 av_log(avctx
, AV_LOG_ERROR
, "required frame size too large. please "
399 "use a higher bit rate.\n");
400 return AVERROR(EINVAL
);
402 assert((put_bits_count(&s
->pb
) & 7) == 0);
404 put_bits(&s
->pb
, 8, 'N');
406 flush_put_bits(&s
->pb
);
408 if (frame
->pts
!= AV_NOPTS_VALUE
)
409 avpkt
->pts
= frame
->pts
- ff_samples_to_time_base(avctx
, avctx
->initial_padding
);
411 avpkt
->size
= avctx
->block_align
;
416 AVCodec ff_wmav1_encoder
= {
418 .long_name
= NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
419 .type
= AVMEDIA_TYPE_AUDIO
,
420 .id
= AV_CODEC_ID_WMAV1
,
421 .priv_data_size
= sizeof(WMACodecContext
),
423 .encode2
= encode_superframe
,
425 .sample_fmts
= (const enum AVSampleFormat
[]) { AV_SAMPLE_FMT_FLTP
,
426 AV_SAMPLE_FMT_NONE
},
429 AVCodec ff_wmav2_encoder
= {
431 .long_name
= NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
432 .type
= AVMEDIA_TYPE_AUDIO
,
433 .id
= AV_CODEC_ID_WMAV2
,
434 .priv_data_size
= sizeof(WMACodecContext
),
436 .encode2
= encode_superframe
,
438 .sample_fmts
= (const enum AVSampleFormat
[]) { AV_SAMPLE_FMT_FLTP
,
439 AV_SAMPLE_FMT_NONE
},