libavcodec/wmavoice.c
Go to the documentation of this file.
00001 /*
00002  * Windows Media Audio Voice decoder.
00003  * Copyright (c) 2009 Ronald S. Bultje
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #define UNCHECKED_BITSTREAM_READER 1
00029 
00030 #include <math.h>
00031 #include "avcodec.h"
00032 #include "internal.h"
00033 #include "get_bits.h"
00034 #include "put_bits.h"
00035 #include "wmavoice_data.h"
00036 #include "celp_math.h"
00037 #include "celp_filters.h"
00038 #include "acelp_vectors.h"
00039 #include "acelp_filters.h"
00040 #include "lsp.h"
00041 #include "libavutil/lzo.h"
00042 #include "dct.h"
00043 #include "rdft.h"
00044 #include "sinewin.h"
00045 
00046 #define MAX_BLOCKS           8   ///< maximum number of blocks per frame
00047 #define MAX_LSPS             16  ///< maximum filter order
00048 #define MAX_LSPS_ALIGN16     16  ///< same as #MAX_LSPS; needs to be multiple
00049 
00050 #define MAX_FRAMES           3   ///< maximum number of frames per superframe
00051 #define MAX_FRAMESIZE        160 ///< maximum number of samples per frame
00052 #define MAX_SIGNAL_HISTORY   416 ///< maximum excitation signal history
00053 #define MAX_SFRAMESIZE       (MAX_FRAMESIZE * MAX_FRAMES)
00054 
00055 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
00056 
00057 #define VLC_NBITS            6   ///< number of bits to read per VLC iteration
00058 
00062 static VLC frame_type_vlc;
00063 
00067 enum {
00068     ACB_TYPE_NONE       = 0, 
00069     ACB_TYPE_ASYMMETRIC = 1, 
00070 
00071 
00072 
00073 
00074     ACB_TYPE_HAMMING    = 2  
00075 
00076 
00077 };
00078 
00082 enum {
00083     FCB_TYPE_SILENCE    = 0, 
00084 
00085 
00086     FCB_TYPE_HARDCODED  = 1, 
00087 
00088     FCB_TYPE_AW_PULSES  = 2, 
00089 
00090     FCB_TYPE_EXC_PULSES = 3, 
00091 
00092 
00093 };
00094 
00098 static const struct frame_type_desc {
00099     uint8_t n_blocks;     
00100 
00101     uint8_t log_n_blocks; 
00102     uint8_t acb_type;     
00103     uint8_t fcb_type;     
00104     uint8_t dbl_pulses;   
00105 
00106 
00107     uint16_t frame_size;  
00108 
00109 } frame_descs[17] = {
00110     { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
00111     { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
00112     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
00113     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
00114     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00115     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00116     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00117     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00118     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
00119     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
00120     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
00121     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
00122     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
00123     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
00124     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
00125     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
00126     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
00127 };
00128 
00132 typedef struct {
00137     AVFrame frame;
00138     GetBitContext gb;             
00139 
00140 
00141 
00142     int8_t vbm_tree[25];          
00143 
00144     int spillover_bitsize;        
00145 
00146 
00147     int history_nsamples;         
00148 
00149 
00150     /* postfilter specific values */
00151     int do_apf;                   
00152 
00153     int denoise_strength;         
00154 
00155     int denoise_tilt_corr;        
00156 
00157     int dc_level;                 
00158 
00159 
00160     int lsps;                     
00161     int lsp_q_mode;               
00162     int lsp_def_mode;             
00163 
00164     int frame_lsp_bitsize;        
00165 
00166     int sframe_lsp_bitsize;       
00167 
00168 
00169     int min_pitch_val;            
00170     int max_pitch_val;            
00171     int pitch_nbits;              
00172 
00173     int block_pitch_nbits;        
00174 
00175     int block_pitch_range;        
00176     int block_delta_pitch_nbits;  
00177 
00178 
00179 
00180     int block_delta_pitch_hrange; 
00181 
00182     uint16_t block_conv_table[4]; 
00183 
00184 
00194     int spillover_nbits;          
00195 
00196 
00197 
00198     int has_residual_lsps;        
00199 
00200 
00201 
00202 
00203     int skip_bits_next;           
00204 
00205 
00206 
00207     uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00210     int sframe_cache_size;        
00211 
00212 
00213 
00214 
00215     PutBitContext pb;             
00216 
00226     double prev_lsps[MAX_LSPS];   
00227 
00228     int last_pitch_val;           
00229     int last_acb_type;            
00230     int pitch_diff_sh16;          
00231 
00232     float silence_gain;           
00233 
00234     int aw_idx_is_ext;            
00235 
00236     int aw_pulse_range;           
00237 
00238 
00239 
00240 
00241 
00242     int aw_n_pulses[2];           
00243 
00244 
00245     int aw_first_pulse_off[2];    
00246 
00247     int aw_next_pulse_off_cache;  
00248 
00249 
00250 
00251 
00252 
00253     int frame_cntr;               
00254 
00255     float gain_pred_err[6];       
00256     float excitation_history[MAX_SIGNAL_HISTORY];
00260     float synth_history[MAX_LSPS]; 
00261 
00270     RDFTContext rdft, irdft;      
00271 
00272     DCTContext dct, dst;          
00273 
00274     float sin[511], cos[511];     
00275 
00276     float postfilter_agc;         
00277 
00278     float dcf_mem[2];             
00279     float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00282     float denoise_filter_cache[MAX_FRAMESIZE];
00283     int   denoise_filter_cache_size; 
00284     DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00286     DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00288     DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00291 
00294 } WMAVoiceContext;
00295 
00305 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00306 {
00307     static const uint8_t bits[] = {
00308          2,  2,  2,  4,  4,  4,
00309          6,  6,  6,  8,  8,  8,
00310         10, 10, 10, 12, 12, 12,
00311         14, 14, 14, 14
00312     };
00313     static const uint16_t codes[] = {
00314           0x0000, 0x0001, 0x0002,        //              00/01/10
00315           0x000c, 0x000d, 0x000e,        //           11+00/01/10
00316           0x003c, 0x003d, 0x003e,        //         1111+00/01/10
00317           0x00fc, 0x00fd, 0x00fe,        //       111111+00/01/10
00318           0x03fc, 0x03fd, 0x03fe,        //     11111111+00/01/10
00319           0x0ffc, 0x0ffd, 0x0ffe,        //   1111111111+00/01/10
00320           0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
00321     };
00322     int cntr[8], n, res;
00323 
00324     memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00325     memset(cntr,     0,    sizeof(cntr));
00326     for (n = 0; n < 17; n++) {
00327         res = get_bits(gb, 3);
00328         if (cntr[res] > 3) // should be >= 3 + (res == 7))
00329             return -1;
00330         vbm_tree[res * 3 + cntr[res]++] = n;
00331     }
00332     INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00333                     bits, 1, 1, codes, 2, 2, 132);
00334     return 0;
00335 }
00336 
00340 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00341 {
00342     int n, flags, pitch_range, lsp16_flag;
00343     WMAVoiceContext *s = ctx->priv_data;
00344 
00353     if (ctx->extradata_size != 46) {
00354         av_log(ctx, AV_LOG_ERROR,
00355                "Invalid extradata size %d (should be 46)\n",
00356                ctx->extradata_size);
00357         return -1;
00358     }
00359     flags                = AV_RL32(ctx->extradata + 18);
00360     s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00361     s->do_apf            =    flags & 0x1;
00362     if (s->do_apf) {
00363         ff_rdft_init(&s->rdft,  7, DFT_R2C);
00364         ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00365         ff_dct_init(&s->dct,  6, DCT_I);
00366         ff_dct_init(&s->dst,  6, DST_I);
00367 
00368         ff_sine_window_init(s->cos, 256);
00369         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00370         for (n = 0; n < 255; n++) {
00371             s->sin[n]       = -s->sin[510 - n];
00372             s->cos[510 - n] =  s->cos[n];
00373         }
00374     }
00375     s->denoise_strength  =   (flags >> 2) & 0xF;
00376     if (s->denoise_strength >= 12) {
00377         av_log(ctx, AV_LOG_ERROR,
00378                "Invalid denoise filter strength %d (max=11)\n",
00379                s->denoise_strength);
00380         return -1;
00381     }
00382     s->denoise_tilt_corr = !!(flags & 0x40);
00383     s->dc_level          =   (flags >> 7) & 0xF;
00384     s->lsp_q_mode        = !!(flags & 0x2000);
00385     s->lsp_def_mode      = !!(flags & 0x4000);
00386     lsp16_flag           =    flags & 0x1000;
00387     if (lsp16_flag) {
00388         s->lsps               = 16;
00389         s->frame_lsp_bitsize  = 34;
00390         s->sframe_lsp_bitsize = 60;
00391     } else {
00392         s->lsps               = 10;
00393         s->frame_lsp_bitsize  = 24;
00394         s->sframe_lsp_bitsize = 48;
00395     }
00396     for (n = 0; n < s->lsps; n++)
00397         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00398 
00399     init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00400     if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00401         av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00402         return -1;
00403     }
00404 
00405     s->min_pitch_val    = ((ctx->sample_rate << 8)      /  400 + 50) >> 8;
00406     s->max_pitch_val    = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00407     pitch_range         = s->max_pitch_val - s->min_pitch_val;
00408     if (pitch_range <= 0) {
00409         av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00410         return -1;
00411     }
00412     s->pitch_nbits      = av_ceil_log2(pitch_range);
00413     s->last_pitch_val   = 40;
00414     s->last_acb_type    = ACB_TYPE_NONE;
00415     s->history_nsamples = s->max_pitch_val + 8;
00416 
00417     if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00418         int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00419             max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00420 
00421         av_log(ctx, AV_LOG_ERROR,
00422                "Unsupported samplerate %d (min=%d, max=%d)\n",
00423                ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
00424 
00425         return -1;
00426     }
00427 
00428     s->block_conv_table[0]      = s->min_pitch_val;
00429     s->block_conv_table[1]      = (pitch_range * 25) >> 6;
00430     s->block_conv_table[2]      = (pitch_range * 44) >> 6;
00431     s->block_conv_table[3]      = s->max_pitch_val - 1;
00432     s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00433     if (s->block_delta_pitch_hrange <= 0) {
00434         av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00435         return -1;
00436     }
00437     s->block_delta_pitch_nbits  = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00438     s->block_pitch_range        = s->block_conv_table[2] +
00439                                   s->block_conv_table[3] + 1 +
00440                                   2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00441     s->block_pitch_nbits        = av_ceil_log2(s->block_pitch_range);
00442 
00443     ctx->sample_fmt             = AV_SAMPLE_FMT_FLT;
00444 
00445     avcodec_get_frame_defaults(&s->frame);
00446     ctx->coded_frame = &s->frame;
00447 
00448     return 0;
00449 }
00450 
00472 static void adaptive_gain_control(float *out, const float *in,
00473                                   const float *speech_synth,
00474                                   int size, float alpha, float *gain_mem)
00475 {
00476     int i;
00477     float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00478     float mem = *gain_mem;
00479 
00480     for (i = 0; i < size; i++) {
00481         speech_energy     += fabsf(speech_synth[i]);
00482         postfilter_energy += fabsf(in[i]);
00483     }
00484     gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00485 
00486     for (i = 0; i < size; i++) {
00487         mem = alpha * mem + gain_scale_factor;
00488         out[i] = in[i] * mem;
00489     }
00490 
00491     *gain_mem = mem;
00492 }
00493 
00512 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00513                            const float *in, float *out, int size)
00514 {
00515     int n;
00516     float optimal_gain = 0, dot;
00517     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00518                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00519                 *best_hist_ptr;
00520 
00521     /* find best fitting point in history */
00522     do {
00523         dot = ff_dot_productf(in, ptr, size);
00524         if (dot > optimal_gain) {
00525             optimal_gain  = dot;
00526             best_hist_ptr = ptr;
00527         }
00528     } while (--ptr >= end);
00529 
00530     if (optimal_gain <= 0)
00531         return -1;
00532     dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00533     if (dot <= 0) // would be 1.0
00534         return -1;
00535 
00536     if (optimal_gain <= dot) {
00537         dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
00538     } else
00539         dot = 0.625;
00540 
00541     /* actual smoothing */
00542     for (n = 0; n < size; n++)
00543         out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00544 
00545     return 0;
00546 }
00547 
00558 static float tilt_factor(const float *lpcs, int n_lpcs)
00559 {
00560     float rh0, rh1;
00561 
00562     rh0 = 1.0     + ff_dot_productf(lpcs,  lpcs,    n_lpcs);
00563     rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00564 
00565     return rh1 / rh0;
00566 }
00567 
00571 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00572                                 int fcb_type, float *coeffs, int remainder)
00573 {
00574     float last_coeff, min = 15.0, max = -15.0;
00575     float irange, angle_mul, gain_mul, range, sq;
00576     int n, idx;
00577 
00578     /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
00579     s->rdft.rdft_calc(&s->rdft, lpcs);
00580 #define log_range(var, assign) do { \
00581         float tmp = log10f(assign);  var = tmp; \
00582         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00583     } while (0)
00584     log_range(last_coeff,  lpcs[1]         * lpcs[1]);
00585     for (n = 1; n < 64; n++)
00586         log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
00587                            lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00588     log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
00589 #undef log_range
00590     range    = max - min;
00591     lpcs[64] = last_coeff;
00592 
00593     /* Now, use this spectrum to pick out these frequencies with higher
00594      * (relative) power/energy (which we then take to be "not noise"),
00595      * and set up a table (still in lpc[]) of (relative) gains per frequency.
00596      * These frequencies will be maintained, while others ("noise") will be
00597      * decreased in the filter output. */
00598     irange    = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
00599     gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00600                                                           (5.0 / 14.7));
00601     angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00602     for (n = 0; n <= 64; n++) {
00603         float pwr;
00604 
00605         idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00606         pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00607         lpcs[n] = angle_mul * pwr;
00608 
00609         /* 70.57 =~ 1/log10(1.0331663) */
00610         idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00611         if (idx > 127) { // fallback if index falls outside table range
00612             coeffs[n] = wmavoice_energy_table[127] *
00613                         powf(1.0331663, idx - 127);
00614         } else
00615             coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00616     }
00617 
00618     /* calculate the Hilbert transform of the gains, which we do (since this
00619      * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
00620      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
00621      * "moment" of the LPCs in this filter. */
00622     s->dct.dct_calc(&s->dct, lpcs);
00623     s->dst.dct_calc(&s->dst, lpcs);
00624 
00625     /* Split out the coefficient indexes into phase/magnitude pairs */
00626     idx = 255 + av_clip(lpcs[64],               -255, 255);
00627     coeffs[0]  = coeffs[0]  * s->cos[idx];
00628     idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00629     last_coeff = coeffs[64] * s->cos[idx];
00630     for (n = 63;; n--) {
00631         idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00632         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00633         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00634 
00635         if (!--n) break;
00636 
00637         idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00638         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00639         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00640     }
00641     coeffs[1] = last_coeff;
00642 
00643     /* move into real domain */
00644     s->irdft.rdft_calc(&s->irdft, coeffs);
00645 
00646     /* tilt correction and normalize scale */
00647     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00648     if (s->denoise_tilt_corr) {
00649         float tilt_mem = 0;
00650 
00651         coeffs[remainder - 1] = 0;
00652         ff_tilt_compensation(&tilt_mem,
00653                              -1.8 * tilt_factor(coeffs, remainder - 1),
00654                              coeffs, remainder);
00655     }
00656     sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00657     for (n = 0; n < remainder; n++)
00658         coeffs[n] *= sq;
00659 }
00660 
00687 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00688                            float *synth_pf, int size,
00689                            const float *lpcs)
00690 {
00691     int remainder, lim, n;
00692 
00693     if (fcb_type != FCB_TYPE_SILENCE) {
00694         float *tilted_lpcs = s->tilted_lpcs_pf,
00695               *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00696 
00697         tilted_lpcs[0]           = 1.0;
00698         memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00699         memset(&tilted_lpcs[s->lsps + 1], 0,
00700                sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00701         ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00702                              tilted_lpcs, s->lsps + 2);
00703 
00704         /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
00705          * size is applied to the next frame. All input beyond this is zero,
00706          * and thus all output beyond this will go towards zero, hence we can
00707          * limit to min(size-1, 127-size) as a performance consideration. */
00708         remainder = FFMIN(127 - size, size - 1);
00709         calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00710 
00711         /* apply coefficients (in frequency spectrum domain), i.e. complex
00712          * number multiplication */
00713         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00714         s->rdft.rdft_calc(&s->rdft, synth_pf);
00715         s->rdft.rdft_calc(&s->rdft, coeffs);
00716         synth_pf[0] *= coeffs[0];
00717         synth_pf[1] *= coeffs[1];
00718         for (n = 1; n < 64; n++) {
00719             float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00720             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00721             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00722         }
00723         s->irdft.rdft_calc(&s->irdft, synth_pf);
00724     }
00725 
00726     /* merge filter output with the history of previous runs */
00727     if (s->denoise_filter_cache_size) {
00728         lim = FFMIN(s->denoise_filter_cache_size, size);
00729         for (n = 0; n < lim; n++)
00730             synth_pf[n] += s->denoise_filter_cache[n];
00731         s->denoise_filter_cache_size -= lim;
00732         memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00733                 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00734     }
00735 
00736     /* move remainder of filter output into a cache for future runs */
00737     if (fcb_type != FCB_TYPE_SILENCE) {
00738         lim = FFMIN(remainder, s->denoise_filter_cache_size);
00739         for (n = 0; n < lim; n++)
00740             s->denoise_filter_cache[n] += synth_pf[size + n];
00741         if (lim < remainder) {
00742             memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00743                    sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00744             s->denoise_filter_cache_size = remainder;
00745         }
00746     }
00747 }
00748 
00769 static void postfilter(WMAVoiceContext *s, const float *synth,
00770                        float *samples,    int size,
00771                        const float *lpcs, float *zero_exc_pf,
00772                        int fcb_type,      int pitch)
00773 {
00774     float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00775           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00776           *synth_filter_in = zero_exc_pf;
00777 
00778     assert(size <= MAX_FRAMESIZE / 2);
00779 
00780     /* generate excitation from input signal */
00781     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00782 
00783     if (fcb_type >= FCB_TYPE_AW_PULSES &&
00784         !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00785         synth_filter_in = synth_filter_in_buf;
00786 
00787     /* re-synthesize speech after smoothening, and keep history */
00788     ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00789                                  synth_filter_in, size, s->lsps);
00790     memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00791            sizeof(synth_pf[0]) * s->lsps);
00792 
00793     wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00794 
00795     adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00796                           &s->postfilter_agc);
00797 
00798     if (s->dc_level > 8) {
00799         /* remove ultra-low frequency DC noise / highpass filter;
00800          * coefficients are identical to those used in SIPR decoding,
00801          * and very closely resemble those used in AMR-NB decoding. */
00802         ff_acelp_apply_order_2_transfer_function(samples, samples,
00803             (const float[2]) { -1.99997,      1.0 },
00804             (const float[2]) { -1.9330735188, 0.93589198496 },
00805             0.93980580475, s->dcf_mem, size);
00806     }
00807 }
00823 static void dequant_lsps(double *lsps, int num,
00824                          const uint16_t *values,
00825                          const uint16_t *sizes,
00826                          int n_stages, const uint8_t *table,
00827                          const double *mul_q,
00828                          const double *base_q)
00829 {
00830     int n, m;
00831 
00832     memset(lsps, 0, num * sizeof(*lsps));
00833     for (n = 0; n < n_stages; n++) {
00834         const uint8_t *t_off = &table[values[n] * num];
00835         double base = base_q[n], mul = mul_q[n];
00836 
00837         for (m = 0; m < num; m++)
00838             lsps[m] += base + mul * t_off[m];
00839 
00840         table += sizes[n] * num;
00841     }
00842 }
00843 
00855 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00856 {
00857     static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00858     static const double mul_lsf[4] = {
00859         5.2187144800e-3,    1.4626986422e-3,
00860         9.6179549166e-4,    1.1325736225e-3
00861     };
00862     static const double base_lsf[4] = {
00863         M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00864         M_PI * -3.3486e-2,  M_PI * -5.7408e-2
00865     };
00866     uint16_t v[4];
00867 
00868     v[0] = get_bits(gb, 8);
00869     v[1] = get_bits(gb, 6);
00870     v[2] = get_bits(gb, 5);
00871     v[3] = get_bits(gb, 5);
00872 
00873     dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00874                  mul_lsf, base_lsf);
00875 }
00876 
00881 static void dequant_lsp10r(GetBitContext *gb,
00882                            double *i_lsps, const double *old,
00883                            double *a1, double *a2, int q_mode)
00884 {
00885     static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00886     static const double mul_lsf[3] = {
00887         2.5807601174e-3,    1.2354460219e-3,   1.1763821673e-3
00888     };
00889     static const double base_lsf[3] = {
00890         M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00891     };
00892     const float (*ipol_tab)[2][10] = q_mode ?
00893         wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00894     uint16_t interpol, v[3];
00895     int n;
00896 
00897     dequant_lsp10i(gb, i_lsps);
00898 
00899     interpol = get_bits(gb, 5);
00900     v[0]     = get_bits(gb, 7);
00901     v[1]     = get_bits(gb, 6);
00902     v[2]     = get_bits(gb, 6);
00903 
00904     for (n = 0; n < 10; n++) {
00905         double delta = old[n] - i_lsps[n];
00906         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00907         a1[10 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00908     }
00909 
00910     dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00911                  mul_lsf, base_lsf);
00912 }
00913 
00917 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00918 {
00919     static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00920     static const double mul_lsf[5] = {
00921         3.3439586280e-3,    6.9908173703e-4,
00922         3.3216608306e-3,    1.0334960326e-3,
00923         3.1899104283e-3
00924     };
00925     static const double base_lsf[5] = {
00926         M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00927         M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00928         M_PI * -1.29816e-1
00929     };
00930     uint16_t v[5];
00931 
00932     v[0] = get_bits(gb, 8);
00933     v[1] = get_bits(gb, 6);
00934     v[2] = get_bits(gb, 7);
00935     v[3] = get_bits(gb, 6);
00936     v[4] = get_bits(gb, 7);
00937 
00938     dequant_lsps( lsps,     5,  v,     vec_sizes,    2,
00939                  wmavoice_dq_lsp16i1,  mul_lsf,     base_lsf);
00940     dequant_lsps(&lsps[5],  5, &v[2], &vec_sizes[2], 2,
00941                  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00942     dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00943                  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00944 }
00945 
00950 static void dequant_lsp16r(GetBitContext *gb,
00951                            double *i_lsps, const double *old,
00952                            double *a1, double *a2, int q_mode)
00953 {
00954     static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00955     static const double mul_lsf[3] = {
00956         1.2232979501e-3,   1.4062241527e-3,   1.6114744851e-3
00957     };
00958     static const double base_lsf[3] = {
00959         M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00960     };
00961     const float (*ipol_tab)[2][16] = q_mode ?
00962         wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00963     uint16_t interpol, v[3];
00964     int n;
00965 
00966     dequant_lsp16i(gb, i_lsps);
00967 
00968     interpol = get_bits(gb, 5);
00969     v[0]     = get_bits(gb, 7);
00970     v[1]     = get_bits(gb, 7);
00971     v[2]     = get_bits(gb, 7);
00972 
00973     for (n = 0; n < 16; n++) {
00974         double delta = old[n] - i_lsps[n];
00975         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00976         a1[16 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00977     }
00978 
00979     dequant_lsps( a2,     10,  v,     vec_sizes,    1,
00980                  wmavoice_dq_lsp16r1,  mul_lsf,     base_lsf);
00981     dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00982                  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00983     dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00984                  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00985 }
00986 
01000 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01001                             const int *pitch)
01002 {
01003     static const int16_t start_offset[94] = {
01004         -11,  -9,  -7,  -5,  -3,  -1,   1,   3,   5,   7,   9,  11,
01005          13,  15,  18,  17,  19,  20,  21,  22,  23,  24,  25,  26,
01006          27,  28,  29,  30,  31,  32,  33,  35,  37,  39,  41,  43,
01007          45,  47,  49,  51,  53,  55,  57,  59,  61,  63,  65,  67,
01008          69,  71,  73,  75,  77,  79,  81,  83,  85,  87,  89,  91,
01009          93,  95,  97,  99, 101, 103, 105, 107, 109, 111, 113, 115,
01010         117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01011         141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01012     };
01013     int bits, offset;
01014 
01015     /* position of pulse */
01016     s->aw_idx_is_ext = 0;
01017     if ((bits = get_bits(gb, 6)) >= 54) {
01018         s->aw_idx_is_ext = 1;
01019         bits += (bits - 54) * 3 + get_bits(gb, 2);
01020     }
01021 
01022     /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
01023      * the distribution of the pulses in each block contained in this frame. */
01024     s->aw_pulse_range        = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01025     for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01026     s->aw_n_pulses[0]        = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01027     s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01028     offset                  += s->aw_n_pulses[0] * pitch[0];
01029     s->aw_n_pulses[1]        = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01030     s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01031 
01032     /* if continuing from a position before the block, reset position to
01033      * start of block (when corrected for the range over which it can be
01034      * spread in aw_pulse_set1()). */
01035     if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01036         while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01037             s->aw_first_pulse_off[1] -= pitch[1];
01038         if (start_offset[bits] < 0)
01039             while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01040                 s->aw_first_pulse_off[0] -= pitch[0];
01041     }
01042 }
01043 
01052 static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01053                          int block_idx, AMRFixed *fcb)
01054 {
01055     uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
01056     uint16_t *use_mask = use_mask_mem + 2;
01057     /* in this function, idx is the index in the 80-bit (+ padding) use_mask
01058      * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
01059      * of idx are the position of the bit within a particular item in the
01060      * array (0 being the most significant bit, and 15 being the least
01061      * significant bit), and the remainder (>> 4) is the index in the
01062      * use_mask[]-array. This is faster and uses less memory than using a
01063      * 80-byte/80-int array. */
01064     int pulse_off = s->aw_first_pulse_off[block_idx],
01065         pulse_start, n, idx, range, aidx, start_off = 0;
01066 
01067     /* set offset of first pulse to within this block */
01068     if (s->aw_n_pulses[block_idx] > 0)
01069         while (pulse_off + s->aw_pulse_range < 1)
01070             pulse_off += fcb->pitch_lag;
01071 
01072     /* find range per pulse */
01073     if (s->aw_n_pulses[0] > 0) {
01074         if (block_idx == 0) {
01075             range = 32;
01076         } else /* block_idx = 1 */ {
01077             range = 8;
01078             if (s->aw_n_pulses[block_idx] > 0)
01079                 pulse_off = s->aw_next_pulse_off_cache;
01080         }
01081     } else
01082         range = 16;
01083     pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01084 
01085     /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
01086      * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
01087      * we exclude that range from being pulsed again in this function. */
01088     memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01089     memset( use_mask,   -1, 5 * sizeof(use_mask[0]));
01090     memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01091     if (s->aw_n_pulses[block_idx] > 0)
01092         for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01093             int excl_range         = s->aw_pulse_range; // always 16 or 24
01094             uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01095             int first_sh           = 16 - (idx & 15);
01096             *use_mask_ptr++       &= 0xFFFFu << first_sh;
01097             excl_range            -= first_sh;
01098             if (excl_range >= 16) {
01099                 *use_mask_ptr++    = 0;
01100                 *use_mask_ptr     &= 0xFFFF >> (excl_range - 16);
01101             } else
01102                 *use_mask_ptr     &= 0xFFFF >> excl_range;
01103         }
01104 
01105     /* find the 'aidx'th offset that is not excluded */
01106     aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01107     for (n = 0; n <= aidx; pulse_start++) {
01108         for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01109         if (idx >= MAX_FRAMESIZE / 2) { // find from zero
01110             if (use_mask[0])      idx = 0x0F;
01111             else if (use_mask[1]) idx = 0x1F;
01112             else if (use_mask[2]) idx = 0x2F;
01113             else if (use_mask[3]) idx = 0x3F;
01114             else if (use_mask[4]) idx = 0x4F;
01115             else return -1;
01116             idx -= av_log2_16bit(use_mask[idx >> 4]);
01117         }
01118         if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01119             use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01120             n++;
01121             start_off = idx;
01122         }
01123     }
01124 
01125     fcb->x[fcb->n] = start_off;
01126     fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01127     fcb->n++;
01128 
01129     /* set offset for next block, relative to start of that block */
01130     n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01131     s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01132     return 0;
01133 }
01134 
01142 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01143                           int block_idx, AMRFixed *fcb)
01144 {
01145     int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01146     float v;
01147 
01148     if (s->aw_n_pulses[block_idx] > 0) {
01149         int n, v_mask, i_mask, sh, n_pulses;
01150 
01151         if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
01152             n_pulses = 3;
01153             v_mask   = 8;
01154             i_mask   = 7;
01155             sh       = 4;
01156         } else { // 4 pulses, 1:sign + 2:index each
01157             n_pulses = 4;
01158             v_mask   = 4;
01159             i_mask   = 3;
01160             sh       = 3;
01161         }
01162 
01163         for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01164             fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01165             fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01166                                  s->aw_first_pulse_off[block_idx];
01167             while (fcb->x[fcb->n] < 0)
01168                 fcb->x[fcb->n] += fcb->pitch_lag;
01169             if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01170                 fcb->n++;
01171         }
01172     } else {
01173         int num2 = (val & 0x1FF) >> 1, delta, idx;
01174 
01175         if (num2 < 1 * 79)      { delta = 1; idx = num2 + 1; }
01176         else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01177         else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01178         else                    { delta = 7; idx = num2 + 1 - 3 * 75; }
01179         v = (val & 0x200) ? -1.0 : 1.0;
01180 
01181         fcb->no_repeat_mask |= 3 << fcb->n;
01182         fcb->x[fcb->n]       = idx - delta;
01183         fcb->y[fcb->n]       = v;
01184         fcb->x[fcb->n + 1]   = idx;
01185         fcb->y[fcb->n + 1]   = (val & 1) ? -v : v;
01186         fcb->n              += 2;
01187     }
01188 }
01189 
01203 static int pRNG(int frame_cntr, int block_num, int block_size)
01204 {
01205     /* array to simplify the calculation of z:
01206      * y = (x % 9) * 5 + 6;
01207      * z = (49995 * x) / y;
01208      * Since y only has 9 values, we can remove the division by using a
01209      * LUT and using FASTDIV-style divisions. For each of the 9 values
01210      * of y, we can rewrite z as:
01211      * z = x * (49995 / y) + x * ((49995 % y) / y)
01212      * In this table, each col represents one possible value of y, the
01213      * first number is 49995 / y, and the second is the FASTDIV variant
01214      * of 49995 % y / y. */
01215     static const unsigned int div_tbl[9][2] = {
01216         { 8332,  3 * 715827883U }, // y =  6
01217         { 4545,  0 * 390451573U }, // y = 11
01218         { 3124, 11 * 268435456U }, // y = 16
01219         { 2380, 15 * 204522253U }, // y = 21
01220         { 1922, 23 * 165191050U }, // y = 26
01221         { 1612, 23 * 138547333U }, // y = 31
01222         { 1388, 27 * 119304648U }, // y = 36
01223         { 1219, 16 * 104755300U }, // y = 41
01224         { 1086, 39 *  93368855U }  // y = 46
01225     };
01226     unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01227     if (x >= 0xFFFF) x -= 0xFFFF;   // max value of x is 8*1877+0xFFFE=0x13AA6,
01228                                     // so this is effectively a modulo (%)
01229     y = x - 9 * MULH(477218589, x); // x % 9
01230     z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01231                                     // z = x * 49995 / (y * 5 + 6)
01232     return z % (1000 - block_size);
01233 }
01234 
01239 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01240                                  int block_idx, int size,
01241                                  const struct frame_type_desc *frame_desc,
01242                                  float *excitation)
01243 {
01244     float gain;
01245     int n, r_idx;
01246 
01247     assert(size <= MAX_FRAMESIZE);
01248 
01249     /* Set the offset from which we start reading wmavoice_std_codebook */
01250     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01251         r_idx = pRNG(s->frame_cntr, block_idx, size);
01252         gain  = s->silence_gain;
01253     } else /* FCB_TYPE_HARDCODED */ {
01254         r_idx = get_bits(gb, 8);
01255         gain  = wmavoice_gain_universal[get_bits(gb, 6)];
01256     }
01257 
01258     /* Clear gain prediction parameters */
01259     memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01260 
01261     /* Apply gain to hardcoded codebook and use that as excitation signal */
01262     for (n = 0; n < size; n++)
01263         excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01264 }
01265 
01270 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01271                                 int block_idx, int size,
01272                                 int block_pitch_sh2,
01273                                 const struct frame_type_desc *frame_desc,
01274                                 float *excitation)
01275 {
01276     static const float gain_coeff[6] = {
01277         0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01278     };
01279     float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01280     int n, idx, gain_weight;
01281     AMRFixed fcb;
01282 
01283     assert(size <= MAX_FRAMESIZE / 2);
01284     memset(pulses, 0, sizeof(*pulses) * size);
01285 
01286     fcb.pitch_lag      = block_pitch_sh2 >> 2;
01287     fcb.pitch_fac      = 1.0;
01288     fcb.no_repeat_mask = 0;
01289     fcb.n              = 0;
01290 
01291     /* For the other frame types, this is where we apply the innovation
01292      * (fixed) codebook pulses of the speech signal. */
01293     if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01294         aw_pulse_set1(s, gb, block_idx, &fcb);
01295         if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
01296             /* Conceal the block with silence and return.
01297              * Skip the correct amount of bits to read the next
01298              * block from the correct offset. */
01299             int r_idx = pRNG(s->frame_cntr, block_idx, size);
01300 
01301             for (n = 0; n < size; n++)
01302                 excitation[n] =
01303                     wmavoice_std_codebook[r_idx + n] * s->silence_gain;
01304             skip_bits(gb, 7 + 1);
01305             return;
01306         }
01307     } else /* FCB_TYPE_EXC_PULSES */ {
01308         int offset_nbits = 5 - frame_desc->log_n_blocks;
01309 
01310         fcb.no_repeat_mask = -1;
01311         /* similar to ff_decode_10_pulses_35bits(), but with single pulses
01312          * (instead of double) for a subset of pulses */
01313         for (n = 0; n < 5; n++) {
01314             float sign;
01315             int pos1, pos2;
01316 
01317             sign           = get_bits1(gb) ? 1.0 : -1.0;
01318             pos1           = get_bits(gb, offset_nbits);
01319             fcb.x[fcb.n]   = n + 5 * pos1;
01320             fcb.y[fcb.n++] = sign;
01321             if (n < frame_desc->dbl_pulses) {
01322                 pos2           = get_bits(gb, offset_nbits);
01323                 fcb.x[fcb.n]   = n + 5 * pos2;
01324                 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01325             }
01326         }
01327     }
01328     ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01329 
01330     /* Calculate gain for adaptive & fixed codebook signal.
01331      * see ff_amr_set_fixed_gain(). */
01332     idx = get_bits(gb, 7);
01333     fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01334                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01335     acb_gain = wmavoice_gain_codebook_acb[idx];
01336     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01337                         -2.9957322736 /* log(0.05) */,
01338                          1.6094379124 /* log(5.0)  */);
01339 
01340     gain_weight = 8 >> frame_desc->log_n_blocks;
01341     memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01342             sizeof(*s->gain_pred_err) * (6 - gain_weight));
01343     for (n = 0; n < gain_weight; n++)
01344         s->gain_pred_err[n] = pred_err;
01345 
01346     /* Calculation of adaptive codebook */
01347     if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01348         int len;
01349         for (n = 0; n < size; n += len) {
01350             int next_idx_sh16;
01351             int abs_idx    = block_idx * size + n;
01352             int pitch_sh16 = (s->last_pitch_val << 16) +
01353                              s->pitch_diff_sh16 * abs_idx;
01354             int pitch      = (pitch_sh16 + 0x6FFF) >> 16;
01355             int idx_sh16   = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01356             idx            = idx_sh16 >> 16;
01357             if (s->pitch_diff_sh16) {
01358                 if (s->pitch_diff_sh16 > 0) {
01359                     next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01360                 } else
01361                     next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01362                 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01363                               1, size - n);
01364             } else
01365                 len = size;
01366 
01367             ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01368                                   wmavoice_ipol1_coeffs, 17,
01369                                   idx, 9, len);
01370         }
01371     } else /* ACB_TYPE_HAMMING */ {
01372         int block_pitch = block_pitch_sh2 >> 2;
01373         idx             = block_pitch_sh2 & 3;
01374         if (idx) {
01375             ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01376                                   wmavoice_ipol2_coeffs, 4,
01377                                   idx, 8, size);
01378         } else
01379             av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01380                               sizeof(float) * size);
01381     }
01382 
01383     /* Interpolate ACB/FCB and use as excitation signal */
01384     ff_weighted_vector_sumf(excitation, excitation, pulses,
01385                             acb_gain, fcb_gain, size);
01386 }
01387 
01404 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01405                         int block_idx, int size,
01406                         int block_pitch_sh2,
01407                         const double *lsps, const double *prev_lsps,
01408                         const struct frame_type_desc *frame_desc,
01409                         float *excitation, float *synth)
01410 {
01411     double i_lsps[MAX_LSPS];
01412     float lpcs[MAX_LSPS];
01413     float fac;
01414     int n;
01415 
01416     if (frame_desc->acb_type == ACB_TYPE_NONE)
01417         synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01418     else
01419         synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01420                             frame_desc, excitation);
01421 
01422     /* convert interpolated LSPs to LPCs */
01423     fac = (block_idx + 0.5) / frame_desc->n_blocks;
01424     for (n = 0; n < s->lsps; n++) // LSF -> LSP
01425         i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01426     ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01427 
01428     /* Speech synthesis */
01429     ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01430 }
01431 
01447 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01448                        float *samples,
01449                        const double *lsps, const double *prev_lsps,
01450                        float *excitation, float *synth)
01451 {
01452     WMAVoiceContext *s = ctx->priv_data;
01453     int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01454     int pitch[MAX_BLOCKS], last_block_pitch;
01455 
01456     /* Parse frame type ("frame header"), see frame_descs */
01457     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01458 
01459     if (bd_idx < 0) {
01460         av_log(ctx, AV_LOG_ERROR,
01461                "Invalid frame type VLC code, skipping\n");
01462         return -1;
01463     }
01464 
01465     block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01466 
01467     /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
01468     if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01469         /* Pitch is provided per frame, which is interpreted as the pitch of
01470          * the last sample of the last block of this frame. We can interpolate
01471          * the pitch of other blocks (and even pitch-per-sample) by gradually
01472          * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
01473         n_blocks_x2      = frame_descs[bd_idx].n_blocks << 1;
01474         log_n_blocks_x2  = frame_descs[bd_idx].log_n_blocks + 1;
01475         cur_pitch_val    = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01476         cur_pitch_val    = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01477         if (s->last_acb_type == ACB_TYPE_NONE ||
01478             20 * abs(cur_pitch_val - s->last_pitch_val) >
01479                 (cur_pitch_val + s->last_pitch_val))
01480             s->last_pitch_val = cur_pitch_val;
01481 
01482         /* pitch per block */
01483         for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01484             int fac = n * 2 + 1;
01485 
01486             pitch[n] = (MUL16(fac,                 cur_pitch_val) +
01487                         MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01488                         frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01489         }
01490 
01491         /* "pitch-diff-per-sample" for calculation of pitch per sample */
01492         s->pitch_diff_sh16 =
01493             ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01494     }
01495 
01496     /* Global gain (if silence) and pitch-adaptive window coordinates */
01497     switch (frame_descs[bd_idx].fcb_type) {
01498     case FCB_TYPE_SILENCE:
01499         s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01500         break;
01501     case FCB_TYPE_AW_PULSES:
01502         aw_parse_coords(s, gb, pitch);
01503         break;
01504     }
01505 
01506     for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01507         int bl_pitch_sh2;
01508 
01509         /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
01510         switch (frame_descs[bd_idx].acb_type) {
01511         case ACB_TYPE_HAMMING: {
01512             /* Pitch is given per block. Per-block pitches are encoded as an
01513              * absolute value for the first block, and then delta values
01514              * relative to this value) for all subsequent blocks. The scale of
01515              * this pitch value is semi-logaritmic compared to its use in the
01516              * decoder, so we convert it to normal scale also. */
01517             int block_pitch,
01518                 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01519                 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01520                 t3 =  s->block_conv_table[3] - s->block_conv_table[2] + 1;
01521 
01522             if (n == 0) {
01523                 block_pitch = get_bits(gb, s->block_pitch_nbits);
01524             } else
01525                 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01526                                  get_bits(gb, s->block_delta_pitch_nbits);
01527             /* Convert last_ so that any next delta is within _range */
01528             last_block_pitch = av_clip(block_pitch,
01529                                        s->block_delta_pitch_hrange,
01530                                        s->block_pitch_range -
01531                                            s->block_delta_pitch_hrange);
01532 
01533             /* Convert semi-log-style scale back to normal scale */
01534             if (block_pitch < t1) {
01535                 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01536             } else {
01537                 block_pitch -= t1;
01538                 if (block_pitch < t2) {
01539                     bl_pitch_sh2 =
01540                         (s->block_conv_table[1] << 2) + (block_pitch << 1);
01541                 } else {
01542                     block_pitch -= t2;
01543                     if (block_pitch < t3) {
01544                         bl_pitch_sh2 =
01545                             (s->block_conv_table[2] + block_pitch) << 2;
01546                     } else
01547                         bl_pitch_sh2 = s->block_conv_table[3] << 2;
01548                 }
01549             }
01550             pitch[n] = bl_pitch_sh2 >> 2;
01551             break;
01552         }
01553 
01554         case ACB_TYPE_ASYMMETRIC: {
01555             bl_pitch_sh2 = pitch[n] << 2;
01556             break;
01557         }
01558 
01559         default: // ACB_TYPE_NONE has no pitch
01560             bl_pitch_sh2 = 0;
01561             break;
01562         }
01563 
01564         synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01565                     lsps, prev_lsps, &frame_descs[bd_idx],
01566                     &excitation[n * block_nsamples],
01567                     &synth[n * block_nsamples]);
01568     }
01569 
01570     /* Averaging projection filter, if applicable. Else, just copy samples
01571      * from synthesis buffer */
01572     if (s->do_apf) {
01573         double i_lsps[MAX_LSPS];
01574         float lpcs[MAX_LSPS];
01575 
01576         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01577             i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01578         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01579         postfilter(s, synth, samples, 80, lpcs,
01580                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01581                    frame_descs[bd_idx].fcb_type, pitch[0]);
01582 
01583         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01584             i_lsps[n] = cos(lsps[n]);
01585         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01586         postfilter(s, &synth[80], &samples[80], 80, lpcs,
01587                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01588                    frame_descs[bd_idx].fcb_type, pitch[0]);
01589     } else
01590         memcpy(samples, synth, 160 * sizeof(synth[0]));
01591 
01592     /* Cache values for next frame */
01593     s->frame_cntr++;
01594     if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
01595     s->last_acb_type = frame_descs[bd_idx].acb_type;
01596     switch (frame_descs[bd_idx].acb_type) {
01597     case ACB_TYPE_NONE:
01598         s->last_pitch_val = 0;
01599         break;
01600     case ACB_TYPE_ASYMMETRIC:
01601         s->last_pitch_val = cur_pitch_val;
01602         break;
01603     case ACB_TYPE_HAMMING:
01604         s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01605         break;
01606     }
01607 
01608     return 0;
01609 }
01610 
01623 static void stabilize_lsps(double *lsps, int num)
01624 {
01625     int n, m, l;
01626 
01627     /* set minimum value for first, maximum value for last and minimum
01628      * spacing between LSF values.
01629      * Very similar to ff_set_min_dist_lsf(), but in double. */
01630     lsps[0]       = FFMAX(lsps[0],       0.0015 * M_PI);
01631     for (n = 1; n < num; n++)
01632         lsps[n]   = FFMAX(lsps[n],       lsps[n - 1] + 0.0125 * M_PI);
01633     lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01634 
01635     /* reorder (looks like one-time / non-recursed bubblesort).
01636      * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
01637     for (n = 1; n < num; n++) {
01638         if (lsps[n] < lsps[n - 1]) {
01639             for (m = 1; m < num; m++) {
01640                 double tmp = lsps[m];
01641                 for (l = m - 1; l >= 0; l--) {
01642                     if (lsps[l] <= tmp) break;
01643                     lsps[l + 1] = lsps[l];
01644                 }
01645                 lsps[l + 1] = tmp;
01646             }
01647             break;
01648         }
01649     }
01650 }
01651 
01661 static int check_bits_for_superframe(GetBitContext *orig_gb,
01662                                      WMAVoiceContext *s)
01663 {
01664     GetBitContext s_gb, *gb = &s_gb;
01665     int n, need_bits, bd_idx;
01666     const struct frame_type_desc *frame_desc;
01667 
01668     /* initialize a copy */
01669     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01670     skip_bits_long(gb, get_bits_count(orig_gb));
01671     assert(get_bits_left(gb) == get_bits_left(orig_gb));
01672 
01673     /* superframe header */
01674     if (get_bits_left(gb) < 14)
01675         return 1;
01676     if (!get_bits1(gb))
01677         return -1;                        // WMAPro-in-WMAVoice superframe
01678     if (get_bits1(gb)) skip_bits(gb, 12); // number of  samples in superframe
01679     if (s->has_residual_lsps) {           // residual LSPs (for all frames)
01680         if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01681             return 1;
01682         skip_bits_long(gb, s->sframe_lsp_bitsize);
01683     }
01684 
01685     /* frames */
01686     for (n = 0; n < MAX_FRAMES; n++) {
01687         int aw_idx_is_ext = 0;
01688 
01689         if (!s->has_residual_lsps) {     // independent LSPs (per-frame)
01690            if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01691            skip_bits_long(gb, s->frame_lsp_bitsize);
01692         }
01693         bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01694         if (bd_idx < 0)
01695             return -1;                   // invalid frame type VLC code
01696         frame_desc = &frame_descs[bd_idx];
01697         if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01698             if (get_bits_left(gb) < s->pitch_nbits)
01699                 return 1;
01700             skip_bits_long(gb, s->pitch_nbits);
01701         }
01702         if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01703             skip_bits(gb, 8);
01704         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01705             int tmp = get_bits(gb, 6);
01706             if (tmp >= 0x36) {
01707                 skip_bits(gb, 2);
01708                 aw_idx_is_ext = 1;
01709             }
01710         }
01711 
01712         /* blocks */
01713         if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01714             need_bits = s->block_pitch_nbits +
01715                 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01716         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01717             need_bits = 2 * !aw_idx_is_ext;
01718         } else
01719             need_bits = 0;
01720         need_bits += frame_desc->frame_size;
01721         if (get_bits_left(gb) < need_bits)
01722             return 1;
01723         skip_bits_long(gb, need_bits);
01724     }
01725 
01726     return 0;
01727 }
01728 
01749 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01750 {
01751     WMAVoiceContext *s = ctx->priv_data;
01752     GetBitContext *gb = &s->gb, s_gb;
01753     int n, res, n_samples = 480;
01754     double lsps[MAX_FRAMES][MAX_LSPS];
01755     const double *mean_lsf = s->lsps == 16 ?
01756         wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01757     float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01758     float synth[MAX_LSPS + MAX_SFRAMESIZE];
01759     float *samples;
01760 
01761     memcpy(synth,      s->synth_history,
01762            s->lsps             * sizeof(*synth));
01763     memcpy(excitation, s->excitation_history,
01764            s->history_nsamples * sizeof(*excitation));
01765 
01766     if (s->sframe_cache_size > 0) {
01767         gb = &s_gb;
01768         init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01769         s->sframe_cache_size = 0;
01770     }
01771 
01772     if ((res = check_bits_for_superframe(gb, s)) == 1) {
01773         *got_frame_ptr = 0;
01774         return 1;
01775     }
01776 
01777     /* First bit is speech/music bit, it differentiates between WMAVoice
01778      * speech samples (the actual codec) and WMAVoice music samples, which
01779      * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
01780      * the wild yet. */
01781     if (!get_bits1(gb)) {
01782         av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01783         return -1;
01784     }
01785 
01786     /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
01787     if (get_bits1(gb)) {
01788         if ((n_samples = get_bits(gb, 12)) > 480) {
01789             av_log(ctx, AV_LOG_ERROR,
01790                    "Superframe encodes >480 samples (%d), not allowed\n",
01791                    n_samples);
01792             return -1;
01793         }
01794     }
01795     /* Parse LSPs, if global for the superframe (can also be per-frame). */
01796     if (s->has_residual_lsps) {
01797         double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01798 
01799         for (n = 0; n < s->lsps; n++)
01800             prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01801 
01802         if (s->lsps == 10) {
01803             dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01804         } else /* s->lsps == 16 */
01805             dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01806 
01807         for (n = 0; n < s->lsps; n++) {
01808             lsps[0][n]  = mean_lsf[n] + (a1[n]           - a2[n * 2]);
01809             lsps[1][n]  = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01810             lsps[2][n] += mean_lsf[n];
01811         }
01812         for (n = 0; n < 3; n++)
01813             stabilize_lsps(lsps[n], s->lsps);
01814     }
01815 
01816     /* get output buffer */
01817     s->frame.nb_samples = 480;
01818     if ((res = ff_get_buffer(ctx, &s->frame)) < 0) {
01819         av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01820         return res;
01821     }
01822     s->frame.nb_samples = n_samples;
01823     samples = (float *)s->frame.data[0];
01824 
01825     /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
01826     for (n = 0; n < 3; n++) {
01827         if (!s->has_residual_lsps) {
01828             int m;
01829 
01830             if (s->lsps == 10) {
01831                 dequant_lsp10i(gb, lsps[n]);
01832             } else /* s->lsps == 16 */
01833                 dequant_lsp16i(gb, lsps[n]);
01834 
01835             for (m = 0; m < s->lsps; m++)
01836                 lsps[n][m] += mean_lsf[m];
01837             stabilize_lsps(lsps[n], s->lsps);
01838         }
01839 
01840         if ((res = synth_frame(ctx, gb, n,
01841                                &samples[n * MAX_FRAMESIZE],
01842                                lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01843                                &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01844                                &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01845             *got_frame_ptr = 0;
01846             return res;
01847         }
01848     }
01849 
01850     /* Statistics? FIXME - we don't check for length, a slight overrun
01851      * will be caught by internal buffer padding, and anything else
01852      * will be skipped, not read. */
01853     if (get_bits1(gb)) {
01854         res = get_bits(gb, 4);
01855         skip_bits(gb, 10 * (res + 1));
01856     }
01857 
01858     *got_frame_ptr = 1;
01859 
01860     /* Update history */
01861     memcpy(s->prev_lsps,           lsps[2],
01862            s->lsps             * sizeof(*s->prev_lsps));
01863     memcpy(s->synth_history,      &synth[MAX_SFRAMESIZE],
01864            s->lsps             * sizeof(*synth));
01865     memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01866            s->history_nsamples * sizeof(*excitation));
01867     if (s->do_apf)
01868         memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
01869                 s->history_nsamples * sizeof(*s->zero_exc_pf));
01870 
01871     return 0;
01872 }
01873 
01881 static int parse_packet_header(WMAVoiceContext *s)
01882 {
01883     GetBitContext *gb = &s->gb;
01884     unsigned int res;
01885 
01886     if (get_bits_left(gb) < 11)
01887         return 1;
01888     skip_bits(gb, 4);          // packet sequence number
01889     s->has_residual_lsps = get_bits1(gb);
01890     do {
01891         res = get_bits(gb, 6); // number of superframes per packet
01892                                // (minus first one if there is spillover)
01893         if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01894             return 1;
01895     } while (res == 0x3F);
01896     s->spillover_nbits   = get_bits(gb, s->spillover_bitsize);
01897 
01898     return 0;
01899 }
01900 
01916 static void copy_bits(PutBitContext *pb,
01917                       const uint8_t *data, int size,
01918                       GetBitContext *gb, int nbits)
01919 {
01920     int rmn_bytes, rmn_bits;
01921 
01922     rmn_bits = rmn_bytes = get_bits_left(gb);
01923     if (rmn_bits < nbits)
01924         return;
01925     if (nbits > pb->size_in_bits - put_bits_count(pb))
01926         return;
01927     rmn_bits &= 7; rmn_bytes >>= 3;
01928     if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01929         put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01930     avpriv_copy_bits(pb, data + size - rmn_bytes,
01931                  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01932 }
01933 
01945 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01946                                   int *got_frame_ptr, AVPacket *avpkt)
01947 {
01948     WMAVoiceContext *s = ctx->priv_data;
01949     GetBitContext *gb = &s->gb;
01950     int size, res, pos;
01951 
01952     /* Packets are sometimes a multiple of ctx->block_align, with a packet
01953      * header at each ctx->block_align bytes. However, Libav's ASF demuxer
01954      * feeds us ASF packets, which may concatenate multiple "codec" packets
01955      * in a single "muxer" packet, so we artificially emulate that by
01956      * capping the packet size at ctx->block_align. */
01957     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01958     if (!size) {
01959         *got_frame_ptr = 0;
01960         return 0;
01961     }
01962     init_get_bits(&s->gb, avpkt->data, size << 3);
01963 
01964     /* size == ctx->block_align is used to indicate whether we are dealing with
01965      * a new packet or a packet of which we already read the packet header
01966      * previously. */
01967     if (size == ctx->block_align) { // new packet header
01968         if ((res = parse_packet_header(s)) < 0)
01969             return res;
01970 
01971         /* If the packet header specifies a s->spillover_nbits, then we want
01972          * to push out all data of the previous packet (+ spillover) before
01973          * continuing to parse new superframes in the current packet. */
01974         if (s->spillover_nbits > 0) {
01975             if (s->sframe_cache_size > 0) {
01976                 int cnt = get_bits_count(gb);
01977                 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01978                 flush_put_bits(&s->pb);
01979                 s->sframe_cache_size += s->spillover_nbits;
01980                 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01981                     *got_frame_ptr) {
01982                     cnt += s->spillover_nbits;
01983                     s->skip_bits_next = cnt & 7;
01984                     *(AVFrame *)data = s->frame;
01985                     return cnt >> 3;
01986                 } else
01987                     skip_bits_long (gb, s->spillover_nbits - cnt +
01988                                     get_bits_count(gb)); // resync
01989             } else
01990                 skip_bits_long(gb, s->spillover_nbits);  // resync
01991         }
01992     } else if (s->skip_bits_next)
01993         skip_bits(gb, s->skip_bits_next);
01994 
01995     /* Try parsing superframes in current packet */
01996     s->sframe_cache_size = 0;
01997     s->skip_bits_next = 0;
01998     pos = get_bits_left(gb);
01999     if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
02000         return res;
02001     } else if (*got_frame_ptr) {
02002         int cnt = get_bits_count(gb);
02003         s->skip_bits_next = cnt & 7;
02004         *(AVFrame *)data = s->frame;
02005         return cnt >> 3;
02006     } else if ((s->sframe_cache_size = pos) > 0) {
02007         /* rewind bit reader to start of last (incomplete) superframe... */
02008         init_get_bits(gb, avpkt->data, size << 3);
02009         skip_bits_long(gb, (size << 3) - pos);
02010         assert(get_bits_left(gb) == pos);
02011 
02012         /* ...and cache it for spillover in next packet */
02013         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
02014         copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
02015         // FIXME bad - just copy bytes as whole and add use the
02016         // skip_bits_next field
02017     }
02018 
02019     return size;
02020 }
02021 
02022 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02023 {
02024     WMAVoiceContext *s = ctx->priv_data;
02025 
02026     if (s->do_apf) {
02027         ff_rdft_end(&s->rdft);
02028         ff_rdft_end(&s->irdft);
02029         ff_dct_end(&s->dct);
02030         ff_dct_end(&s->dst);
02031     }
02032 
02033     return 0;
02034 }
02035 
02036 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02037 {
02038     WMAVoiceContext *s = ctx->priv_data;
02039     int n;
02040 
02041     s->postfilter_agc    = 0;
02042     s->sframe_cache_size = 0;
02043     s->skip_bits_next    = 0;
02044     for (n = 0; n < s->lsps; n++)
02045         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02046     memset(s->excitation_history, 0,
02047            sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02048     memset(s->synth_history,      0,
02049            sizeof(*s->synth_history)      * MAX_LSPS);
02050     memset(s->gain_pred_err,      0,
02051            sizeof(s->gain_pred_err));
02052 
02053     if (s->do_apf) {
02054         memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02055                sizeof(*s->synth_filter_out_buf) * s->lsps);
02056         memset(s->dcf_mem,              0,
02057                sizeof(*s->dcf_mem)              * 2);
02058         memset(s->zero_exc_pf,          0,
02059                sizeof(*s->zero_exc_pf)          * s->history_nsamples);
02060         memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02061     }
02062 }
02063 
02064 AVCodec ff_wmavoice_decoder = {
02065     .name           = "wmavoice",
02066     .type           = AVMEDIA_TYPE_AUDIO,
02067     .id             = CODEC_ID_WMAVOICE,
02068     .priv_data_size = sizeof(WMAVoiceContext),
02069     .init           = wmavoice_decode_init,
02070     .close          = wmavoice_decode_end,
02071     .decode         = wmavoice_decode_packet,
02072     .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02073     .flush     = wmavoice_flush,
02074     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02075 };