00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #define UNCHECKED_BITSTREAM_READER 1
00029
00030 #include <math.h>
00031 #include "avcodec.h"
00032 #include "internal.h"
00033 #include "get_bits.h"
00034 #include "put_bits.h"
00035 #include "wmavoice_data.h"
00036 #include "celp_math.h"
00037 #include "celp_filters.h"
00038 #include "acelp_vectors.h"
00039 #include "acelp_filters.h"
00040 #include "lsp.h"
00041 #include "libavutil/lzo.h"
00042 #include "dct.h"
00043 #include "rdft.h"
00044 #include "sinewin.h"
00045
00046 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame
00047 #define MAX_LSPS 16 ///< maximum filter order
00048 #define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple
00049
00050 #define MAX_FRAMES 3 ///< maximum number of frames per superframe
00051 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame
00052 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history
00053 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00054
00055 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
00056
00057 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration
00058
00062 static VLC frame_type_vlc;
00063
00067 enum {
00068 ACB_TYPE_NONE = 0,
00069 ACB_TYPE_ASYMMETRIC = 1,
00070
00071
00072
00073
00074 ACB_TYPE_HAMMING = 2
00075
00076
00077 };
00078
00082 enum {
00083 FCB_TYPE_SILENCE = 0,
00084
00085
00086 FCB_TYPE_HARDCODED = 1,
00087
00088 FCB_TYPE_AW_PULSES = 2,
00089
00090 FCB_TYPE_EXC_PULSES = 3,
00091
00092
00093 };
00094
00098 static const struct frame_type_desc {
00099 uint8_t n_blocks;
00100
00101 uint8_t log_n_blocks;
00102 uint8_t acb_type;
00103 uint8_t fcb_type;
00104 uint8_t dbl_pulses;
00105
00106
00107 uint16_t frame_size;
00108
00109 } frame_descs[17] = {
00110 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00111 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00112 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00113 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00114 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00115 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00116 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00117 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00118 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00119 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00120 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00121 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00122 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00123 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00124 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00125 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00126 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00127 };
00128
00132 typedef struct {
00137 AVFrame frame;
00138 GetBitContext gb;
00139
00140
00141
00142 int8_t vbm_tree[25];
00143
00144 int spillover_bitsize;
00145
00146
00147 int history_nsamples;
00148
00149
00150
00151 int do_apf;
00152
00153 int denoise_strength;
00154
00155 int denoise_tilt_corr;
00156
00157 int dc_level;
00158
00159
00160 int lsps;
00161 int lsp_q_mode;
00162 int lsp_def_mode;
00163
00164 int frame_lsp_bitsize;
00165
00166 int sframe_lsp_bitsize;
00167
00168
00169 int min_pitch_val;
00170 int max_pitch_val;
00171 int pitch_nbits;
00172
00173 int block_pitch_nbits;
00174
00175 int block_pitch_range;
00176 int block_delta_pitch_nbits;
00177
00178
00179
00180 int block_delta_pitch_hrange;
00181
00182 uint16_t block_conv_table[4];
00183
00184
00194 int spillover_nbits;
00195
00196
00197
00198 int has_residual_lsps;
00199
00200
00201
00202
00203 int skip_bits_next;
00204
00205
00206
00207 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00210 int sframe_cache_size;
00211
00212
00213
00214
00215 PutBitContext pb;
00216
00226 double prev_lsps[MAX_LSPS];
00227
00228 int last_pitch_val;
00229 int last_acb_type;
00230 int pitch_diff_sh16;
00231
00232 float silence_gain;
00233
00234 int aw_idx_is_ext;
00235
00236 int aw_pulse_range;
00237
00238
00239
00240
00241
00242 int aw_n_pulses[2];
00243
00244
00245 int aw_first_pulse_off[2];
00246
00247 int aw_next_pulse_off_cache;
00248
00249
00250
00251
00252
00253 int frame_cntr;
00254
00255 float gain_pred_err[6];
00256 float excitation_history[MAX_SIGNAL_HISTORY];
00260 float synth_history[MAX_LSPS];
00261
00270 RDFTContext rdft, irdft;
00271
00272 DCTContext dct, dst;
00273
00274 float sin[511], cos[511];
00275
00276 float postfilter_agc;
00277
00278 float dcf_mem[2];
00279 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00282 float denoise_filter_cache[MAX_FRAMESIZE];
00283 int denoise_filter_cache_size;
00284 DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00286 DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00288 DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00291
00294 } WMAVoiceContext;
00295
00305 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00306 {
00307 static const uint8_t bits[] = {
00308 2, 2, 2, 4, 4, 4,
00309 6, 6, 6, 8, 8, 8,
00310 10, 10, 10, 12, 12, 12,
00311 14, 14, 14, 14
00312 };
00313 static const uint16_t codes[] = {
00314 0x0000, 0x0001, 0x0002,
00315 0x000c, 0x000d, 0x000e,
00316 0x003c, 0x003d, 0x003e,
00317 0x00fc, 0x00fd, 0x00fe,
00318 0x03fc, 0x03fd, 0x03fe,
00319 0x0ffc, 0x0ffd, 0x0ffe,
00320 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00321 };
00322 int cntr[8], n, res;
00323
00324 memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00325 memset(cntr, 0, sizeof(cntr));
00326 for (n = 0; n < 17; n++) {
00327 res = get_bits(gb, 3);
00328 if (cntr[res] > 3)
00329 return -1;
00330 vbm_tree[res * 3 + cntr[res]++] = n;
00331 }
00332 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00333 bits, 1, 1, codes, 2, 2, 132);
00334 return 0;
00335 }
00336
00340 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00341 {
00342 int n, flags, pitch_range, lsp16_flag;
00343 WMAVoiceContext *s = ctx->priv_data;
00344
00353 if (ctx->extradata_size != 46) {
00354 av_log(ctx, AV_LOG_ERROR,
00355 "Invalid extradata size %d (should be 46)\n",
00356 ctx->extradata_size);
00357 return -1;
00358 }
00359 flags = AV_RL32(ctx->extradata + 18);
00360 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00361 s->do_apf = flags & 0x1;
00362 if (s->do_apf) {
00363 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00364 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00365 ff_dct_init(&s->dct, 6, DCT_I);
00366 ff_dct_init(&s->dst, 6, DST_I);
00367
00368 ff_sine_window_init(s->cos, 256);
00369 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00370 for (n = 0; n < 255; n++) {
00371 s->sin[n] = -s->sin[510 - n];
00372 s->cos[510 - n] = s->cos[n];
00373 }
00374 }
00375 s->denoise_strength = (flags >> 2) & 0xF;
00376 if (s->denoise_strength >= 12) {
00377 av_log(ctx, AV_LOG_ERROR,
00378 "Invalid denoise filter strength %d (max=11)\n",
00379 s->denoise_strength);
00380 return -1;
00381 }
00382 s->denoise_tilt_corr = !!(flags & 0x40);
00383 s->dc_level = (flags >> 7) & 0xF;
00384 s->lsp_q_mode = !!(flags & 0x2000);
00385 s->lsp_def_mode = !!(flags & 0x4000);
00386 lsp16_flag = flags & 0x1000;
00387 if (lsp16_flag) {
00388 s->lsps = 16;
00389 s->frame_lsp_bitsize = 34;
00390 s->sframe_lsp_bitsize = 60;
00391 } else {
00392 s->lsps = 10;
00393 s->frame_lsp_bitsize = 24;
00394 s->sframe_lsp_bitsize = 48;
00395 }
00396 for (n = 0; n < s->lsps; n++)
00397 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00398
00399 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00400 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00401 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00402 return -1;
00403 }
00404
00405 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00406 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00407 pitch_range = s->max_pitch_val - s->min_pitch_val;
00408 if (pitch_range <= 0) {
00409 av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00410 return -1;
00411 }
00412 s->pitch_nbits = av_ceil_log2(pitch_range);
00413 s->last_pitch_val = 40;
00414 s->last_acb_type = ACB_TYPE_NONE;
00415 s->history_nsamples = s->max_pitch_val + 8;
00416
00417 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00418 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00419 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00420
00421 av_log(ctx, AV_LOG_ERROR,
00422 "Unsupported samplerate %d (min=%d, max=%d)\n",
00423 ctx->sample_rate, min_sr, max_sr);
00424
00425 return -1;
00426 }
00427
00428 s->block_conv_table[0] = s->min_pitch_val;
00429 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00430 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00431 s->block_conv_table[3] = s->max_pitch_val - 1;
00432 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00433 if (s->block_delta_pitch_hrange <= 0) {
00434 av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00435 return -1;
00436 }
00437 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00438 s->block_pitch_range = s->block_conv_table[2] +
00439 s->block_conv_table[3] + 1 +
00440 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00441 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00442
00443 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00444
00445 avcodec_get_frame_defaults(&s->frame);
00446 ctx->coded_frame = &s->frame;
00447
00448 return 0;
00449 }
00450
00472 static void adaptive_gain_control(float *out, const float *in,
00473 const float *speech_synth,
00474 int size, float alpha, float *gain_mem)
00475 {
00476 int i;
00477 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00478 float mem = *gain_mem;
00479
00480 for (i = 0; i < size; i++) {
00481 speech_energy += fabsf(speech_synth[i]);
00482 postfilter_energy += fabsf(in[i]);
00483 }
00484 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00485
00486 for (i = 0; i < size; i++) {
00487 mem = alpha * mem + gain_scale_factor;
00488 out[i] = in[i] * mem;
00489 }
00490
00491 *gain_mem = mem;
00492 }
00493
00512 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00513 const float *in, float *out, int size)
00514 {
00515 int n;
00516 float optimal_gain = 0, dot;
00517 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00518 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00519 *best_hist_ptr;
00520
00521
00522 do {
00523 dot = ff_dot_productf(in, ptr, size);
00524 if (dot > optimal_gain) {
00525 optimal_gain = dot;
00526 best_hist_ptr = ptr;
00527 }
00528 } while (--ptr >= end);
00529
00530 if (optimal_gain <= 0)
00531 return -1;
00532 dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00533 if (dot <= 0)
00534 return -1;
00535
00536 if (optimal_gain <= dot) {
00537 dot = dot / (dot + 0.6 * optimal_gain);
00538 } else
00539 dot = 0.625;
00540
00541
00542 for (n = 0; n < size; n++)
00543 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00544
00545 return 0;
00546 }
00547
00558 static float tilt_factor(const float *lpcs, int n_lpcs)
00559 {
00560 float rh0, rh1;
00561
00562 rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs);
00563 rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00564
00565 return rh1 / rh0;
00566 }
00567
00571 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00572 int fcb_type, float *coeffs, int remainder)
00573 {
00574 float last_coeff, min = 15.0, max = -15.0;
00575 float irange, angle_mul, gain_mul, range, sq;
00576 int n, idx;
00577
00578
00579 s->rdft.rdft_calc(&s->rdft, lpcs);
00580 #define log_range(var, assign) do { \
00581 float tmp = log10f(assign); var = tmp; \
00582 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00583 } while (0)
00584 log_range(last_coeff, lpcs[1] * lpcs[1]);
00585 for (n = 1; n < 64; n++)
00586 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00587 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00588 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00589 #undef log_range
00590 range = max - min;
00591 lpcs[64] = last_coeff;
00592
00593
00594
00595
00596
00597
00598 irange = 64.0 / range;
00599 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00600 (5.0 / 14.7));
00601 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00602 for (n = 0; n <= 64; n++) {
00603 float pwr;
00604
00605 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00606 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00607 lpcs[n] = angle_mul * pwr;
00608
00609
00610 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00611 if (idx > 127) {
00612 coeffs[n] = wmavoice_energy_table[127] *
00613 powf(1.0331663, idx - 127);
00614 } else
00615 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00616 }
00617
00618
00619
00620
00621
00622 s->dct.dct_calc(&s->dct, lpcs);
00623 s->dst.dct_calc(&s->dst, lpcs);
00624
00625
00626 idx = 255 + av_clip(lpcs[64], -255, 255);
00627 coeffs[0] = coeffs[0] * s->cos[idx];
00628 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00629 last_coeff = coeffs[64] * s->cos[idx];
00630 for (n = 63;; n--) {
00631 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00632 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00633 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00634
00635 if (!--n) break;
00636
00637 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00638 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00639 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00640 }
00641 coeffs[1] = last_coeff;
00642
00643
00644 s->irdft.rdft_calc(&s->irdft, coeffs);
00645
00646
00647 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00648 if (s->denoise_tilt_corr) {
00649 float tilt_mem = 0;
00650
00651 coeffs[remainder - 1] = 0;
00652 ff_tilt_compensation(&tilt_mem,
00653 -1.8 * tilt_factor(coeffs, remainder - 1),
00654 coeffs, remainder);
00655 }
00656 sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00657 for (n = 0; n < remainder; n++)
00658 coeffs[n] *= sq;
00659 }
00660
00687 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00688 float *synth_pf, int size,
00689 const float *lpcs)
00690 {
00691 int remainder, lim, n;
00692
00693 if (fcb_type != FCB_TYPE_SILENCE) {
00694 float *tilted_lpcs = s->tilted_lpcs_pf,
00695 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00696
00697 tilted_lpcs[0] = 1.0;
00698 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00699 memset(&tilted_lpcs[s->lsps + 1], 0,
00700 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00701 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00702 tilted_lpcs, s->lsps + 2);
00703
00704
00705
00706
00707
00708 remainder = FFMIN(127 - size, size - 1);
00709 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00710
00711
00712
00713 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00714 s->rdft.rdft_calc(&s->rdft, synth_pf);
00715 s->rdft.rdft_calc(&s->rdft, coeffs);
00716 synth_pf[0] *= coeffs[0];
00717 synth_pf[1] *= coeffs[1];
00718 for (n = 1; n < 64; n++) {
00719 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00720 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00721 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00722 }
00723 s->irdft.rdft_calc(&s->irdft, synth_pf);
00724 }
00725
00726
00727 if (s->denoise_filter_cache_size) {
00728 lim = FFMIN(s->denoise_filter_cache_size, size);
00729 for (n = 0; n < lim; n++)
00730 synth_pf[n] += s->denoise_filter_cache[n];
00731 s->denoise_filter_cache_size -= lim;
00732 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00733 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00734 }
00735
00736
00737 if (fcb_type != FCB_TYPE_SILENCE) {
00738 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00739 for (n = 0; n < lim; n++)
00740 s->denoise_filter_cache[n] += synth_pf[size + n];
00741 if (lim < remainder) {
00742 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00743 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00744 s->denoise_filter_cache_size = remainder;
00745 }
00746 }
00747 }
00748
00769 static void postfilter(WMAVoiceContext *s, const float *synth,
00770 float *samples, int size,
00771 const float *lpcs, float *zero_exc_pf,
00772 int fcb_type, int pitch)
00773 {
00774 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00775 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00776 *synth_filter_in = zero_exc_pf;
00777
00778 assert(size <= MAX_FRAMESIZE / 2);
00779
00780
00781 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00782
00783 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00784 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00785 synth_filter_in = synth_filter_in_buf;
00786
00787
00788 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00789 synth_filter_in, size, s->lsps);
00790 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00791 sizeof(synth_pf[0]) * s->lsps);
00792
00793 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00794
00795 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00796 &s->postfilter_agc);
00797
00798 if (s->dc_level > 8) {
00799
00800
00801
00802 ff_acelp_apply_order_2_transfer_function(samples, samples,
00803 (const float[2]) { -1.99997, 1.0 },
00804 (const float[2]) { -1.9330735188, 0.93589198496 },
00805 0.93980580475, s->dcf_mem, size);
00806 }
00807 }
00823 static void dequant_lsps(double *lsps, int num,
00824 const uint16_t *values,
00825 const uint16_t *sizes,
00826 int n_stages, const uint8_t *table,
00827 const double *mul_q,
00828 const double *base_q)
00829 {
00830 int n, m;
00831
00832 memset(lsps, 0, num * sizeof(*lsps));
00833 for (n = 0; n < n_stages; n++) {
00834 const uint8_t *t_off = &table[values[n] * num];
00835 double base = base_q[n], mul = mul_q[n];
00836
00837 for (m = 0; m < num; m++)
00838 lsps[m] += base + mul * t_off[m];
00839
00840 table += sizes[n] * num;
00841 }
00842 }
00843
00855 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00856 {
00857 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00858 static const double mul_lsf[4] = {
00859 5.2187144800e-3, 1.4626986422e-3,
00860 9.6179549166e-4, 1.1325736225e-3
00861 };
00862 static const double base_lsf[4] = {
00863 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00864 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00865 };
00866 uint16_t v[4];
00867
00868 v[0] = get_bits(gb, 8);
00869 v[1] = get_bits(gb, 6);
00870 v[2] = get_bits(gb, 5);
00871 v[3] = get_bits(gb, 5);
00872
00873 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00874 mul_lsf, base_lsf);
00875 }
00876
00881 static void dequant_lsp10r(GetBitContext *gb,
00882 double *i_lsps, const double *old,
00883 double *a1, double *a2, int q_mode)
00884 {
00885 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00886 static const double mul_lsf[3] = {
00887 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00888 };
00889 static const double base_lsf[3] = {
00890 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00891 };
00892 const float (*ipol_tab)[2][10] = q_mode ?
00893 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00894 uint16_t interpol, v[3];
00895 int n;
00896
00897 dequant_lsp10i(gb, i_lsps);
00898
00899 interpol = get_bits(gb, 5);
00900 v[0] = get_bits(gb, 7);
00901 v[1] = get_bits(gb, 6);
00902 v[2] = get_bits(gb, 6);
00903
00904 for (n = 0; n < 10; n++) {
00905 double delta = old[n] - i_lsps[n];
00906 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00907 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00908 }
00909
00910 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00911 mul_lsf, base_lsf);
00912 }
00913
00917 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00918 {
00919 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00920 static const double mul_lsf[5] = {
00921 3.3439586280e-3, 6.9908173703e-4,
00922 3.3216608306e-3, 1.0334960326e-3,
00923 3.1899104283e-3
00924 };
00925 static const double base_lsf[5] = {
00926 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00927 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00928 M_PI * -1.29816e-1
00929 };
00930 uint16_t v[5];
00931
00932 v[0] = get_bits(gb, 8);
00933 v[1] = get_bits(gb, 6);
00934 v[2] = get_bits(gb, 7);
00935 v[3] = get_bits(gb, 6);
00936 v[4] = get_bits(gb, 7);
00937
00938 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00939 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00940 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00941 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00942 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00943 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00944 }
00945
00950 static void dequant_lsp16r(GetBitContext *gb,
00951 double *i_lsps, const double *old,
00952 double *a1, double *a2, int q_mode)
00953 {
00954 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00955 static const double mul_lsf[3] = {
00956 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00957 };
00958 static const double base_lsf[3] = {
00959 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00960 };
00961 const float (*ipol_tab)[2][16] = q_mode ?
00962 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00963 uint16_t interpol, v[3];
00964 int n;
00965
00966 dequant_lsp16i(gb, i_lsps);
00967
00968 interpol = get_bits(gb, 5);
00969 v[0] = get_bits(gb, 7);
00970 v[1] = get_bits(gb, 7);
00971 v[2] = get_bits(gb, 7);
00972
00973 for (n = 0; n < 16; n++) {
00974 double delta = old[n] - i_lsps[n];
00975 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00976 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00977 }
00978
00979 dequant_lsps( a2, 10, v, vec_sizes, 1,
00980 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00981 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00982 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00983 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00984 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00985 }
00986
01000 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01001 const int *pitch)
01002 {
01003 static const int16_t start_offset[94] = {
01004 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
01005 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
01006 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
01007 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
01008 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
01009 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
01010 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01011 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01012 };
01013 int bits, offset;
01014
01015
01016 s->aw_idx_is_ext = 0;
01017 if ((bits = get_bits(gb, 6)) >= 54) {
01018 s->aw_idx_is_ext = 1;
01019 bits += (bits - 54) * 3 + get_bits(gb, 2);
01020 }
01021
01022
01023
01024 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01025 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01026 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01027 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01028 offset += s->aw_n_pulses[0] * pitch[0];
01029 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01030 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01031
01032
01033
01034
01035 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01036 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01037 s->aw_first_pulse_off[1] -= pitch[1];
01038 if (start_offset[bits] < 0)
01039 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01040 s->aw_first_pulse_off[0] -= pitch[0];
01041 }
01042 }
01043
01052 static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01053 int block_idx, AMRFixed *fcb)
01054 {
01055 uint16_t use_mask_mem[9];
01056 uint16_t *use_mask = use_mask_mem + 2;
01057
01058
01059
01060
01061
01062
01063
01064 int pulse_off = s->aw_first_pulse_off[block_idx],
01065 pulse_start, n, idx, range, aidx, start_off = 0;
01066
01067
01068 if (s->aw_n_pulses[block_idx] > 0)
01069 while (pulse_off + s->aw_pulse_range < 1)
01070 pulse_off += fcb->pitch_lag;
01071
01072
01073 if (s->aw_n_pulses[0] > 0) {
01074 if (block_idx == 0) {
01075 range = 32;
01076 } else {
01077 range = 8;
01078 if (s->aw_n_pulses[block_idx] > 0)
01079 pulse_off = s->aw_next_pulse_off_cache;
01080 }
01081 } else
01082 range = 16;
01083 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01084
01085
01086
01087
01088 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01089 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01090 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01091 if (s->aw_n_pulses[block_idx] > 0)
01092 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01093 int excl_range = s->aw_pulse_range;
01094 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01095 int first_sh = 16 - (idx & 15);
01096 *use_mask_ptr++ &= 0xFFFFu << first_sh;
01097 excl_range -= first_sh;
01098 if (excl_range >= 16) {
01099 *use_mask_ptr++ = 0;
01100 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01101 } else
01102 *use_mask_ptr &= 0xFFFF >> excl_range;
01103 }
01104
01105
01106 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01107 for (n = 0; n <= aidx; pulse_start++) {
01108 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01109 if (idx >= MAX_FRAMESIZE / 2) {
01110 if (use_mask[0]) idx = 0x0F;
01111 else if (use_mask[1]) idx = 0x1F;
01112 else if (use_mask[2]) idx = 0x2F;
01113 else if (use_mask[3]) idx = 0x3F;
01114 else if (use_mask[4]) idx = 0x4F;
01115 else return -1;
01116 idx -= av_log2_16bit(use_mask[idx >> 4]);
01117 }
01118 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01119 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01120 n++;
01121 start_off = idx;
01122 }
01123 }
01124
01125 fcb->x[fcb->n] = start_off;
01126 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01127 fcb->n++;
01128
01129
01130 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01131 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01132 return 0;
01133 }
01134
01142 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01143 int block_idx, AMRFixed *fcb)
01144 {
01145 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01146 float v;
01147
01148 if (s->aw_n_pulses[block_idx] > 0) {
01149 int n, v_mask, i_mask, sh, n_pulses;
01150
01151 if (s->aw_pulse_range == 24) {
01152 n_pulses = 3;
01153 v_mask = 8;
01154 i_mask = 7;
01155 sh = 4;
01156 } else {
01157 n_pulses = 4;
01158 v_mask = 4;
01159 i_mask = 3;
01160 sh = 3;
01161 }
01162
01163 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01164 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01165 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01166 s->aw_first_pulse_off[block_idx];
01167 while (fcb->x[fcb->n] < 0)
01168 fcb->x[fcb->n] += fcb->pitch_lag;
01169 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01170 fcb->n++;
01171 }
01172 } else {
01173 int num2 = (val & 0x1FF) >> 1, delta, idx;
01174
01175 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01176 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01177 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01178 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01179 v = (val & 0x200) ? -1.0 : 1.0;
01180
01181 fcb->no_repeat_mask |= 3 << fcb->n;
01182 fcb->x[fcb->n] = idx - delta;
01183 fcb->y[fcb->n] = v;
01184 fcb->x[fcb->n + 1] = idx;
01185 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01186 fcb->n += 2;
01187 }
01188 }
01189
01203 static int pRNG(int frame_cntr, int block_num, int block_size)
01204 {
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215 static const unsigned int div_tbl[9][2] = {
01216 { 8332, 3 * 715827883U },
01217 { 4545, 0 * 390451573U },
01218 { 3124, 11 * 268435456U },
01219 { 2380, 15 * 204522253U },
01220 { 1922, 23 * 165191050U },
01221 { 1612, 23 * 138547333U },
01222 { 1388, 27 * 119304648U },
01223 { 1219, 16 * 104755300U },
01224 { 1086, 39 * 93368855U }
01225 };
01226 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01227 if (x >= 0xFFFF) x -= 0xFFFF;
01228
01229 y = x - 9 * MULH(477218589, x);
01230 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01231
01232 return z % (1000 - block_size);
01233 }
01234
01239 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01240 int block_idx, int size,
01241 const struct frame_type_desc *frame_desc,
01242 float *excitation)
01243 {
01244 float gain;
01245 int n, r_idx;
01246
01247 assert(size <= MAX_FRAMESIZE);
01248
01249
01250 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01251 r_idx = pRNG(s->frame_cntr, block_idx, size);
01252 gain = s->silence_gain;
01253 } else {
01254 r_idx = get_bits(gb, 8);
01255 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01256 }
01257
01258
01259 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01260
01261
01262 for (n = 0; n < size; n++)
01263 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01264 }
01265
01270 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01271 int block_idx, int size,
01272 int block_pitch_sh2,
01273 const struct frame_type_desc *frame_desc,
01274 float *excitation)
01275 {
01276 static const float gain_coeff[6] = {
01277 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01278 };
01279 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01280 int n, idx, gain_weight;
01281 AMRFixed fcb;
01282
01283 assert(size <= MAX_FRAMESIZE / 2);
01284 memset(pulses, 0, sizeof(*pulses) * size);
01285
01286 fcb.pitch_lag = block_pitch_sh2 >> 2;
01287 fcb.pitch_fac = 1.0;
01288 fcb.no_repeat_mask = 0;
01289 fcb.n = 0;
01290
01291
01292
01293 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01294 aw_pulse_set1(s, gb, block_idx, &fcb);
01295 if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
01296
01297
01298
01299 int r_idx = pRNG(s->frame_cntr, block_idx, size);
01300
01301 for (n = 0; n < size; n++)
01302 excitation[n] =
01303 wmavoice_std_codebook[r_idx + n] * s->silence_gain;
01304 skip_bits(gb, 7 + 1);
01305 return;
01306 }
01307 } else {
01308 int offset_nbits = 5 - frame_desc->log_n_blocks;
01309
01310 fcb.no_repeat_mask = -1;
01311
01312
01313 for (n = 0; n < 5; n++) {
01314 float sign;
01315 int pos1, pos2;
01316
01317 sign = get_bits1(gb) ? 1.0 : -1.0;
01318 pos1 = get_bits(gb, offset_nbits);
01319 fcb.x[fcb.n] = n + 5 * pos1;
01320 fcb.y[fcb.n++] = sign;
01321 if (n < frame_desc->dbl_pulses) {
01322 pos2 = get_bits(gb, offset_nbits);
01323 fcb.x[fcb.n] = n + 5 * pos2;
01324 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01325 }
01326 }
01327 }
01328 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01329
01330
01331
01332 idx = get_bits(gb, 7);
01333 fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01334 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01335 acb_gain = wmavoice_gain_codebook_acb[idx];
01336 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01337 -2.9957322736 ,
01338 1.6094379124 );
01339
01340 gain_weight = 8 >> frame_desc->log_n_blocks;
01341 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01342 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01343 for (n = 0; n < gain_weight; n++)
01344 s->gain_pred_err[n] = pred_err;
01345
01346
01347 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01348 int len;
01349 for (n = 0; n < size; n += len) {
01350 int next_idx_sh16;
01351 int abs_idx = block_idx * size + n;
01352 int pitch_sh16 = (s->last_pitch_val << 16) +
01353 s->pitch_diff_sh16 * abs_idx;
01354 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01355 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01356 idx = idx_sh16 >> 16;
01357 if (s->pitch_diff_sh16) {
01358 if (s->pitch_diff_sh16 > 0) {
01359 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01360 } else
01361 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01362 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01363 1, size - n);
01364 } else
01365 len = size;
01366
01367 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01368 wmavoice_ipol1_coeffs, 17,
01369 idx, 9, len);
01370 }
01371 } else {
01372 int block_pitch = block_pitch_sh2 >> 2;
01373 idx = block_pitch_sh2 & 3;
01374 if (idx) {
01375 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01376 wmavoice_ipol2_coeffs, 4,
01377 idx, 8, size);
01378 } else
01379 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01380 sizeof(float) * size);
01381 }
01382
01383
01384 ff_weighted_vector_sumf(excitation, excitation, pulses,
01385 acb_gain, fcb_gain, size);
01386 }
01387
01404 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01405 int block_idx, int size,
01406 int block_pitch_sh2,
01407 const double *lsps, const double *prev_lsps,
01408 const struct frame_type_desc *frame_desc,
01409 float *excitation, float *synth)
01410 {
01411 double i_lsps[MAX_LSPS];
01412 float lpcs[MAX_LSPS];
01413 float fac;
01414 int n;
01415
01416 if (frame_desc->acb_type == ACB_TYPE_NONE)
01417 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01418 else
01419 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01420 frame_desc, excitation);
01421
01422
01423 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01424 for (n = 0; n < s->lsps; n++)
01425 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01426 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01427
01428
01429 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01430 }
01431
01447 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01448 float *samples,
01449 const double *lsps, const double *prev_lsps,
01450 float *excitation, float *synth)
01451 {
01452 WMAVoiceContext *s = ctx->priv_data;
01453 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01454 int pitch[MAX_BLOCKS], last_block_pitch;
01455
01456
01457 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01458
01459 if (bd_idx < 0) {
01460 av_log(ctx, AV_LOG_ERROR,
01461 "Invalid frame type VLC code, skipping\n");
01462 return -1;
01463 }
01464
01465 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01466
01467
01468 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01469
01470
01471
01472
01473 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01474 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01475 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01476 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01477 if (s->last_acb_type == ACB_TYPE_NONE ||
01478 20 * abs(cur_pitch_val - s->last_pitch_val) >
01479 (cur_pitch_val + s->last_pitch_val))
01480 s->last_pitch_val = cur_pitch_val;
01481
01482
01483 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01484 int fac = n * 2 + 1;
01485
01486 pitch[n] = (MUL16(fac, cur_pitch_val) +
01487 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01488 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01489 }
01490
01491
01492 s->pitch_diff_sh16 =
01493 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01494 }
01495
01496
01497 switch (frame_descs[bd_idx].fcb_type) {
01498 case FCB_TYPE_SILENCE:
01499 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01500 break;
01501 case FCB_TYPE_AW_PULSES:
01502 aw_parse_coords(s, gb, pitch);
01503 break;
01504 }
01505
01506 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01507 int bl_pitch_sh2;
01508
01509
01510 switch (frame_descs[bd_idx].acb_type) {
01511 case ACB_TYPE_HAMMING: {
01512
01513
01514
01515
01516
01517 int block_pitch,
01518 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01519 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01520 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01521
01522 if (n == 0) {
01523 block_pitch = get_bits(gb, s->block_pitch_nbits);
01524 } else
01525 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01526 get_bits(gb, s->block_delta_pitch_nbits);
01527
01528 last_block_pitch = av_clip(block_pitch,
01529 s->block_delta_pitch_hrange,
01530 s->block_pitch_range -
01531 s->block_delta_pitch_hrange);
01532
01533
01534 if (block_pitch < t1) {
01535 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01536 } else {
01537 block_pitch -= t1;
01538 if (block_pitch < t2) {
01539 bl_pitch_sh2 =
01540 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01541 } else {
01542 block_pitch -= t2;
01543 if (block_pitch < t3) {
01544 bl_pitch_sh2 =
01545 (s->block_conv_table[2] + block_pitch) << 2;
01546 } else
01547 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01548 }
01549 }
01550 pitch[n] = bl_pitch_sh2 >> 2;
01551 break;
01552 }
01553
01554 case ACB_TYPE_ASYMMETRIC: {
01555 bl_pitch_sh2 = pitch[n] << 2;
01556 break;
01557 }
01558
01559 default:
01560 bl_pitch_sh2 = 0;
01561 break;
01562 }
01563
01564 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01565 lsps, prev_lsps, &frame_descs[bd_idx],
01566 &excitation[n * block_nsamples],
01567 &synth[n * block_nsamples]);
01568 }
01569
01570
01571
01572 if (s->do_apf) {
01573 double i_lsps[MAX_LSPS];
01574 float lpcs[MAX_LSPS];
01575
01576 for (n = 0; n < s->lsps; n++)
01577 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01578 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01579 postfilter(s, synth, samples, 80, lpcs,
01580 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01581 frame_descs[bd_idx].fcb_type, pitch[0]);
01582
01583 for (n = 0; n < s->lsps; n++)
01584 i_lsps[n] = cos(lsps[n]);
01585 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01586 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01587 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01588 frame_descs[bd_idx].fcb_type, pitch[0]);
01589 } else
01590 memcpy(samples, synth, 160 * sizeof(synth[0]));
01591
01592
01593 s->frame_cntr++;
01594 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01595 s->last_acb_type = frame_descs[bd_idx].acb_type;
01596 switch (frame_descs[bd_idx].acb_type) {
01597 case ACB_TYPE_NONE:
01598 s->last_pitch_val = 0;
01599 break;
01600 case ACB_TYPE_ASYMMETRIC:
01601 s->last_pitch_val = cur_pitch_val;
01602 break;
01603 case ACB_TYPE_HAMMING:
01604 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01605 break;
01606 }
01607
01608 return 0;
01609 }
01610
01623 static void stabilize_lsps(double *lsps, int num)
01624 {
01625 int n, m, l;
01626
01627
01628
01629
01630 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01631 for (n = 1; n < num; n++)
01632 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01633 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01634
01635
01636
01637 for (n = 1; n < num; n++) {
01638 if (lsps[n] < lsps[n - 1]) {
01639 for (m = 1; m < num; m++) {
01640 double tmp = lsps[m];
01641 for (l = m - 1; l >= 0; l--) {
01642 if (lsps[l] <= tmp) break;
01643 lsps[l + 1] = lsps[l];
01644 }
01645 lsps[l + 1] = tmp;
01646 }
01647 break;
01648 }
01649 }
01650 }
01651
01661 static int check_bits_for_superframe(GetBitContext *orig_gb,
01662 WMAVoiceContext *s)
01663 {
01664 GetBitContext s_gb, *gb = &s_gb;
01665 int n, need_bits, bd_idx;
01666 const struct frame_type_desc *frame_desc;
01667
01668
01669 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01670 skip_bits_long(gb, get_bits_count(orig_gb));
01671 assert(get_bits_left(gb) == get_bits_left(orig_gb));
01672
01673
01674 if (get_bits_left(gb) < 14)
01675 return 1;
01676 if (!get_bits1(gb))
01677 return -1;
01678 if (get_bits1(gb)) skip_bits(gb, 12);
01679 if (s->has_residual_lsps) {
01680 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01681 return 1;
01682 skip_bits_long(gb, s->sframe_lsp_bitsize);
01683 }
01684
01685
01686 for (n = 0; n < MAX_FRAMES; n++) {
01687 int aw_idx_is_ext = 0;
01688
01689 if (!s->has_residual_lsps) {
01690 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01691 skip_bits_long(gb, s->frame_lsp_bitsize);
01692 }
01693 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01694 if (bd_idx < 0)
01695 return -1;
01696 frame_desc = &frame_descs[bd_idx];
01697 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01698 if (get_bits_left(gb) < s->pitch_nbits)
01699 return 1;
01700 skip_bits_long(gb, s->pitch_nbits);
01701 }
01702 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01703 skip_bits(gb, 8);
01704 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01705 int tmp = get_bits(gb, 6);
01706 if (tmp >= 0x36) {
01707 skip_bits(gb, 2);
01708 aw_idx_is_ext = 1;
01709 }
01710 }
01711
01712
01713 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01714 need_bits = s->block_pitch_nbits +
01715 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01716 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01717 need_bits = 2 * !aw_idx_is_ext;
01718 } else
01719 need_bits = 0;
01720 need_bits += frame_desc->frame_size;
01721 if (get_bits_left(gb) < need_bits)
01722 return 1;
01723 skip_bits_long(gb, need_bits);
01724 }
01725
01726 return 0;
01727 }
01728
01749 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01750 {
01751 WMAVoiceContext *s = ctx->priv_data;
01752 GetBitContext *gb = &s->gb, s_gb;
01753 int n, res, n_samples = 480;
01754 double lsps[MAX_FRAMES][MAX_LSPS];
01755 const double *mean_lsf = s->lsps == 16 ?
01756 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01757 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01758 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01759 float *samples;
01760
01761 memcpy(synth, s->synth_history,
01762 s->lsps * sizeof(*synth));
01763 memcpy(excitation, s->excitation_history,
01764 s->history_nsamples * sizeof(*excitation));
01765
01766 if (s->sframe_cache_size > 0) {
01767 gb = &s_gb;
01768 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01769 s->sframe_cache_size = 0;
01770 }
01771
01772 if ((res = check_bits_for_superframe(gb, s)) == 1) {
01773 *got_frame_ptr = 0;
01774 return 1;
01775 }
01776
01777
01778
01779
01780
01781 if (!get_bits1(gb)) {
01782 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01783 return -1;
01784 }
01785
01786
01787 if (get_bits1(gb)) {
01788 if ((n_samples = get_bits(gb, 12)) > 480) {
01789 av_log(ctx, AV_LOG_ERROR,
01790 "Superframe encodes >480 samples (%d), not allowed\n",
01791 n_samples);
01792 return -1;
01793 }
01794 }
01795
01796 if (s->has_residual_lsps) {
01797 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01798
01799 for (n = 0; n < s->lsps; n++)
01800 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01801
01802 if (s->lsps == 10) {
01803 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01804 } else
01805 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01806
01807 for (n = 0; n < s->lsps; n++) {
01808 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01809 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01810 lsps[2][n] += mean_lsf[n];
01811 }
01812 for (n = 0; n < 3; n++)
01813 stabilize_lsps(lsps[n], s->lsps);
01814 }
01815
01816
01817 s->frame.nb_samples = 480;
01818 if ((res = ff_get_buffer(ctx, &s->frame)) < 0) {
01819 av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01820 return res;
01821 }
01822 s->frame.nb_samples = n_samples;
01823 samples = (float *)s->frame.data[0];
01824
01825
01826 for (n = 0; n < 3; n++) {
01827 if (!s->has_residual_lsps) {
01828 int m;
01829
01830 if (s->lsps == 10) {
01831 dequant_lsp10i(gb, lsps[n]);
01832 } else
01833 dequant_lsp16i(gb, lsps[n]);
01834
01835 for (m = 0; m < s->lsps; m++)
01836 lsps[n][m] += mean_lsf[m];
01837 stabilize_lsps(lsps[n], s->lsps);
01838 }
01839
01840 if ((res = synth_frame(ctx, gb, n,
01841 &samples[n * MAX_FRAMESIZE],
01842 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01843 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01844 &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01845 *got_frame_ptr = 0;
01846 return res;
01847 }
01848 }
01849
01850
01851
01852
01853 if (get_bits1(gb)) {
01854 res = get_bits(gb, 4);
01855 skip_bits(gb, 10 * (res + 1));
01856 }
01857
01858 *got_frame_ptr = 1;
01859
01860
01861 memcpy(s->prev_lsps, lsps[2],
01862 s->lsps * sizeof(*s->prev_lsps));
01863 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01864 s->lsps * sizeof(*synth));
01865 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01866 s->history_nsamples * sizeof(*excitation));
01867 if (s->do_apf)
01868 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01869 s->history_nsamples * sizeof(*s->zero_exc_pf));
01870
01871 return 0;
01872 }
01873
01881 static int parse_packet_header(WMAVoiceContext *s)
01882 {
01883 GetBitContext *gb = &s->gb;
01884 unsigned int res;
01885
01886 if (get_bits_left(gb) < 11)
01887 return 1;
01888 skip_bits(gb, 4);
01889 s->has_residual_lsps = get_bits1(gb);
01890 do {
01891 res = get_bits(gb, 6);
01892
01893 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01894 return 1;
01895 } while (res == 0x3F);
01896 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01897
01898 return 0;
01899 }
01900
01916 static void copy_bits(PutBitContext *pb,
01917 const uint8_t *data, int size,
01918 GetBitContext *gb, int nbits)
01919 {
01920 int rmn_bytes, rmn_bits;
01921
01922 rmn_bits = rmn_bytes = get_bits_left(gb);
01923 if (rmn_bits < nbits)
01924 return;
01925 if (nbits > pb->size_in_bits - put_bits_count(pb))
01926 return;
01927 rmn_bits &= 7; rmn_bytes >>= 3;
01928 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01929 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01930 avpriv_copy_bits(pb, data + size - rmn_bytes,
01931 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01932 }
01933
01945 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01946 int *got_frame_ptr, AVPacket *avpkt)
01947 {
01948 WMAVoiceContext *s = ctx->priv_data;
01949 GetBitContext *gb = &s->gb;
01950 int size, res, pos;
01951
01952
01953
01954
01955
01956
01957 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01958 if (!size) {
01959 *got_frame_ptr = 0;
01960 return 0;
01961 }
01962 init_get_bits(&s->gb, avpkt->data, size << 3);
01963
01964
01965
01966
01967 if (size == ctx->block_align) {
01968 if ((res = parse_packet_header(s)) < 0)
01969 return res;
01970
01971
01972
01973
01974 if (s->spillover_nbits > 0) {
01975 if (s->sframe_cache_size > 0) {
01976 int cnt = get_bits_count(gb);
01977 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01978 flush_put_bits(&s->pb);
01979 s->sframe_cache_size += s->spillover_nbits;
01980 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01981 *got_frame_ptr) {
01982 cnt += s->spillover_nbits;
01983 s->skip_bits_next = cnt & 7;
01984 *(AVFrame *)data = s->frame;
01985 return cnt >> 3;
01986 } else
01987 skip_bits_long (gb, s->spillover_nbits - cnt +
01988 get_bits_count(gb));
01989 } else
01990 skip_bits_long(gb, s->spillover_nbits);
01991 }
01992 } else if (s->skip_bits_next)
01993 skip_bits(gb, s->skip_bits_next);
01994
01995
01996 s->sframe_cache_size = 0;
01997 s->skip_bits_next = 0;
01998 pos = get_bits_left(gb);
01999 if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
02000 return res;
02001 } else if (*got_frame_ptr) {
02002 int cnt = get_bits_count(gb);
02003 s->skip_bits_next = cnt & 7;
02004 *(AVFrame *)data = s->frame;
02005 return cnt >> 3;
02006 } else if ((s->sframe_cache_size = pos) > 0) {
02007
02008 init_get_bits(gb, avpkt->data, size << 3);
02009 skip_bits_long(gb, (size << 3) - pos);
02010 assert(get_bits_left(gb) == pos);
02011
02012
02013 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
02014 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
02015
02016
02017 }
02018
02019 return size;
02020 }
02021
02022 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02023 {
02024 WMAVoiceContext *s = ctx->priv_data;
02025
02026 if (s->do_apf) {
02027 ff_rdft_end(&s->rdft);
02028 ff_rdft_end(&s->irdft);
02029 ff_dct_end(&s->dct);
02030 ff_dct_end(&s->dst);
02031 }
02032
02033 return 0;
02034 }
02035
02036 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02037 {
02038 WMAVoiceContext *s = ctx->priv_data;
02039 int n;
02040
02041 s->postfilter_agc = 0;
02042 s->sframe_cache_size = 0;
02043 s->skip_bits_next = 0;
02044 for (n = 0; n < s->lsps; n++)
02045 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02046 memset(s->excitation_history, 0,
02047 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02048 memset(s->synth_history, 0,
02049 sizeof(*s->synth_history) * MAX_LSPS);
02050 memset(s->gain_pred_err, 0,
02051 sizeof(s->gain_pred_err));
02052
02053 if (s->do_apf) {
02054 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02055 sizeof(*s->synth_filter_out_buf) * s->lsps);
02056 memset(s->dcf_mem, 0,
02057 sizeof(*s->dcf_mem) * 2);
02058 memset(s->zero_exc_pf, 0,
02059 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02060 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02061 }
02062 }
02063
02064 AVCodec ff_wmavoice_decoder = {
02065 .name = "wmavoice",
02066 .type = AVMEDIA_TYPE_AUDIO,
02067 .id = CODEC_ID_WMAVOICE,
02068 .priv_data_size = sizeof(WMAVoiceContext),
02069 .init = wmavoice_decode_init,
02070 .close = wmavoice_decode_end,
02071 .decode = wmavoice_decode_packet,
02072 .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02073 .flush = wmavoice_flush,
02074 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02075 };