00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <math.h>
00025 #include <stdint.h>
00026 #include <string.h>
00027
00028 #include "libavutil/mathematics.h"
00029 #include "avcodec.h"
00030 #include "internal.h"
00031 #define BITSTREAM_READER_LE
00032 #include "get_bits.h"
00033 #include "dsputil.h"
00034
00035 #include "lsp.h"
00036 #include "celp_math.h"
00037 #include "acelp_vectors.h"
00038 #include "acelp_pitch_delay.h"
00039 #include "acelp_filters.h"
00040 #include "celp_filters.h"
00041
00042 #define MAX_SUBFRAME_COUNT 5
00043
00044 #include "sipr.h"
00045 #include "siprdata.h"
00046
00047 typedef struct {
00048 const char *mode_name;
00049 uint16_t bits_per_frame;
00050 uint8_t subframe_count;
00051 uint8_t frames_per_packet;
00052 float pitch_sharp_factor;
00053
00054
00055 uint8_t number_of_fc_indexes;
00056 uint8_t ma_predictor_bits;
00057
00059 uint8_t vq_indexes_bits[5];
00060
00062 uint8_t pitch_delay_bits[5];
00063
00064 uint8_t gp_index_bits;
00065 uint8_t fc_index_bits[10];
00066 uint8_t gc_index_bits;
00067 } SiprModeParam;
00068
00069 static const SiprModeParam modes[MODE_COUNT] = {
00070 [MODE_16k] = {
00071 .mode_name = "16k",
00072 .bits_per_frame = 160,
00073 .subframe_count = SUBFRAME_COUNT_16k,
00074 .frames_per_packet = 1,
00075 .pitch_sharp_factor = 0.00,
00076
00077 .number_of_fc_indexes = 10,
00078 .ma_predictor_bits = 1,
00079 .vq_indexes_bits = {7, 8, 7, 7, 7},
00080 .pitch_delay_bits = {9, 6},
00081 .gp_index_bits = 4,
00082 .fc_index_bits = {4, 5, 4, 5, 4, 5, 4, 5, 4, 5},
00083 .gc_index_bits = 5
00084 },
00085
00086 [MODE_8k5] = {
00087 .mode_name = "8k5",
00088 .bits_per_frame = 152,
00089 .subframe_count = 3,
00090 .frames_per_packet = 1,
00091 .pitch_sharp_factor = 0.8,
00092
00093 .number_of_fc_indexes = 3,
00094 .ma_predictor_bits = 0,
00095 .vq_indexes_bits = {6, 7, 7, 7, 5},
00096 .pitch_delay_bits = {8, 5, 5},
00097 .gp_index_bits = 0,
00098 .fc_index_bits = {9, 9, 9},
00099 .gc_index_bits = 7
00100 },
00101
00102 [MODE_6k5] = {
00103 .mode_name = "6k5",
00104 .bits_per_frame = 232,
00105 .subframe_count = 3,
00106 .frames_per_packet = 2,
00107 .pitch_sharp_factor = 0.8,
00108
00109 .number_of_fc_indexes = 3,
00110 .ma_predictor_bits = 0,
00111 .vq_indexes_bits = {6, 7, 7, 7, 5},
00112 .pitch_delay_bits = {8, 5, 5},
00113 .gp_index_bits = 0,
00114 .fc_index_bits = {5, 5, 5},
00115 .gc_index_bits = 7
00116 },
00117
00118 [MODE_5k0] = {
00119 .mode_name = "5k0",
00120 .bits_per_frame = 296,
00121 .subframe_count = 5,
00122 .frames_per_packet = 2,
00123 .pitch_sharp_factor = 0.85,
00124
00125 .number_of_fc_indexes = 1,
00126 .ma_predictor_bits = 0,
00127 .vq_indexes_bits = {6, 7, 7, 7, 5},
00128 .pitch_delay_bits = {8, 5, 8, 5, 5},
00129 .gp_index_bits = 0,
00130 .fc_index_bits = {10},
00131 .gc_index_bits = 7
00132 }
00133 };
00134
00135 const float ff_pow_0_5[] = {
00136 1.0/(1 << 1), 1.0/(1 << 2), 1.0/(1 << 3), 1.0/(1 << 4),
00137 1.0/(1 << 5), 1.0/(1 << 6), 1.0/(1 << 7), 1.0/(1 << 8),
00138 1.0/(1 << 9), 1.0/(1 << 10), 1.0/(1 << 11), 1.0/(1 << 12),
00139 1.0/(1 << 13), 1.0/(1 << 14), 1.0/(1 << 15), 1.0/(1 << 16)
00140 };
00141
00142 static void dequant(float *out, const int *idx, const float *cbs[])
00143 {
00144 int i;
00145 int stride = 2;
00146 int num_vec = 5;
00147
00148 for (i = 0; i < num_vec; i++)
00149 memcpy(out + stride*i, cbs[i] + stride*idx[i], stride*sizeof(float));
00150
00151 }
00152
00153 static void lsf_decode_fp(float *lsfnew, float *lsf_history,
00154 const SiprParameters *parm)
00155 {
00156 int i;
00157 float lsf_tmp[LP_FILTER_ORDER];
00158
00159 dequant(lsf_tmp, parm->vq_indexes, lsf_codebooks);
00160
00161 for (i = 0; i < LP_FILTER_ORDER; i++)
00162 lsfnew[i] = lsf_history[i] * 0.33 + lsf_tmp[i] + mean_lsf[i];
00163
00164 ff_sort_nearly_sorted_floats(lsfnew, LP_FILTER_ORDER - 1);
00165
00166
00167
00168 ff_set_min_dist_lsf(lsfnew, LSFQ_DIFF_MIN, LP_FILTER_ORDER - 1);
00169 lsfnew[9] = FFMIN(lsfnew[LP_FILTER_ORDER - 1], 1.3 * M_PI);
00170
00171 memcpy(lsf_history, lsf_tmp, LP_FILTER_ORDER * sizeof(*lsf_history));
00172
00173 for (i = 0; i < LP_FILTER_ORDER - 1; i++)
00174 lsfnew[i] = cos(lsfnew[i]);
00175 lsfnew[LP_FILTER_ORDER - 1] *= 6.153848 / M_PI;
00176 }
00177
00179 static void pitch_sharpening(int pitch_lag_int, float beta,
00180 float *fixed_vector)
00181 {
00182 int i;
00183
00184 for (i = pitch_lag_int; i < SUBFR_SIZE; i++)
00185 fixed_vector[i] += beta * fixed_vector[i - pitch_lag_int];
00186 }
00187
00193 static void decode_parameters(SiprParameters* parms, GetBitContext *pgb,
00194 const SiprModeParam *p)
00195 {
00196 int i, j;
00197
00198 if (p->ma_predictor_bits)
00199 parms->ma_pred_switch = get_bits(pgb, p->ma_predictor_bits);
00200
00201 for (i = 0; i < 5; i++)
00202 parms->vq_indexes[i] = get_bits(pgb, p->vq_indexes_bits[i]);
00203
00204 for (i = 0; i < p->subframe_count; i++) {
00205 parms->pitch_delay[i] = get_bits(pgb, p->pitch_delay_bits[i]);
00206 if (p->gp_index_bits)
00207 parms->gp_index[i] = get_bits(pgb, p->gp_index_bits);
00208
00209 for (j = 0; j < p->number_of_fc_indexes; j++)
00210 parms->fc_indexes[i][j] = get_bits(pgb, p->fc_index_bits[j]);
00211
00212 parms->gc_index[i] = get_bits(pgb, p->gc_index_bits);
00213 }
00214 }
00215
00216 static void sipr_decode_lp(float *lsfnew, const float *lsfold, float *Az,
00217 int num_subfr)
00218 {
00219 double lsfint[LP_FILTER_ORDER];
00220 int i,j;
00221 float t, t0 = 1.0 / num_subfr;
00222
00223 t = t0 * 0.5;
00224 for (i = 0; i < num_subfr; i++) {
00225 for (j = 0; j < LP_FILTER_ORDER; j++)
00226 lsfint[j] = lsfold[j] * (1 - t) + t * lsfnew[j];
00227
00228 ff_amrwb_lsp2lpc(lsfint, Az, LP_FILTER_ORDER);
00229 Az += LP_FILTER_ORDER;
00230 t += t0;
00231 }
00232 }
00233
00237 static void eval_ir(const float *Az, int pitch_lag, float *freq,
00238 float pitch_sharp_factor)
00239 {
00240 float tmp1[SUBFR_SIZE+1], tmp2[LP_FILTER_ORDER+1];
00241 int i;
00242
00243 tmp1[0] = 1.;
00244 for (i = 0; i < LP_FILTER_ORDER; i++) {
00245 tmp1[i+1] = Az[i] * ff_pow_0_55[i];
00246 tmp2[i ] = Az[i] * ff_pow_0_7 [i];
00247 }
00248 memset(tmp1 + 11, 0, 37 * sizeof(float));
00249
00250 ff_celp_lp_synthesis_filterf(freq, tmp2, tmp1, SUBFR_SIZE,
00251 LP_FILTER_ORDER);
00252
00253 pitch_sharpening(pitch_lag, pitch_sharp_factor, freq);
00254 }
00255
00259 static void convolute_with_sparse(float *out, const AMRFixed *pulses,
00260 const float *shape, int length)
00261 {
00262 int i, j;
00263
00264 memset(out, 0, length*sizeof(float));
00265 for (i = 0; i < pulses->n; i++)
00266 for (j = pulses->x[i]; j < length; j++)
00267 out[j] += pulses->y[i] * shape[j - pulses->x[i]];
00268 }
00269
00273 static void postfilter_5k0(SiprContext *ctx, const float *lpc, float *samples)
00274 {
00275 float buf[SUBFR_SIZE + LP_FILTER_ORDER];
00276 float *pole_out = buf + LP_FILTER_ORDER;
00277 float lpc_n[LP_FILTER_ORDER];
00278 float lpc_d[LP_FILTER_ORDER];
00279 int i;
00280
00281 for (i = 0; i < LP_FILTER_ORDER; i++) {
00282 lpc_d[i] = lpc[i] * ff_pow_0_75[i];
00283 lpc_n[i] = lpc[i] * ff_pow_0_5 [i];
00284 };
00285
00286 memcpy(pole_out - LP_FILTER_ORDER, ctx->postfilter_mem,
00287 LP_FILTER_ORDER*sizeof(float));
00288
00289 ff_celp_lp_synthesis_filterf(pole_out, lpc_d, samples, SUBFR_SIZE,
00290 LP_FILTER_ORDER);
00291
00292 memcpy(ctx->postfilter_mem, pole_out + SUBFR_SIZE - LP_FILTER_ORDER,
00293 LP_FILTER_ORDER*sizeof(float));
00294
00295 ff_tilt_compensation(&ctx->tilt_mem, 0.4, pole_out, SUBFR_SIZE);
00296
00297 memcpy(pole_out - LP_FILTER_ORDER, ctx->postfilter_mem5k0,
00298 LP_FILTER_ORDER*sizeof(*pole_out));
00299
00300 memcpy(ctx->postfilter_mem5k0, pole_out + SUBFR_SIZE - LP_FILTER_ORDER,
00301 LP_FILTER_ORDER*sizeof(*pole_out));
00302
00303 ff_celp_lp_zero_synthesis_filterf(samples, lpc_n, pole_out, SUBFR_SIZE,
00304 LP_FILTER_ORDER);
00305
00306 }
00307
00308 static void decode_fixed_sparse(AMRFixed *fixed_sparse, const int16_t *pulses,
00309 SiprMode mode, int low_gain)
00310 {
00311 int i;
00312
00313 switch (mode) {
00314 case MODE_6k5:
00315 for (i = 0; i < 3; i++) {
00316 fixed_sparse->x[i] = 3 * (pulses[i] & 0xf) + i;
00317 fixed_sparse->y[i] = pulses[i] & 0x10 ? -1 : 1;
00318 }
00319 fixed_sparse->n = 3;
00320 break;
00321 case MODE_8k5:
00322 for (i = 0; i < 3; i++) {
00323 fixed_sparse->x[2*i ] = 3 * ((pulses[i] >> 4) & 0xf) + i;
00324 fixed_sparse->x[2*i + 1] = 3 * ( pulses[i] & 0xf) + i;
00325
00326 fixed_sparse->y[2*i ] = (pulses[i] & 0x100) ? -1.0: 1.0;
00327
00328 fixed_sparse->y[2*i + 1] =
00329 (fixed_sparse->x[2*i + 1] < fixed_sparse->x[2*i]) ?
00330 -fixed_sparse->y[2*i ] : fixed_sparse->y[2*i];
00331 }
00332
00333 fixed_sparse->n = 6;
00334 break;
00335 case MODE_5k0:
00336 default:
00337 if (low_gain) {
00338 int offset = (pulses[0] & 0x200) ? 2 : 0;
00339 int val = pulses[0];
00340
00341 for (i = 0; i < 3; i++) {
00342 int index = (val & 0x7) * 6 + 4 - i*2;
00343
00344 fixed_sparse->y[i] = (offset + index) & 0x3 ? -1 : 1;
00345 fixed_sparse->x[i] = index;
00346
00347 val >>= 3;
00348 }
00349 fixed_sparse->n = 3;
00350 } else {
00351 int pulse_subset = (pulses[0] >> 8) & 1;
00352
00353 fixed_sparse->x[0] = ((pulses[0] >> 4) & 15) * 3 + pulse_subset;
00354 fixed_sparse->x[1] = ( pulses[0] & 15) * 3 + pulse_subset + 1;
00355
00356 fixed_sparse->y[0] = pulses[0] & 0x200 ? -1 : 1;
00357 fixed_sparse->y[1] = -fixed_sparse->y[0];
00358 fixed_sparse->n = 2;
00359 }
00360 break;
00361 }
00362 }
00363
00364 static void decode_frame(SiprContext *ctx, SiprParameters *params,
00365 float *out_data)
00366 {
00367 int i, j;
00368 int subframe_count = modes[ctx->mode].subframe_count;
00369 int frame_size = subframe_count * SUBFR_SIZE;
00370 float Az[LP_FILTER_ORDER * MAX_SUBFRAME_COUNT];
00371 float *excitation;
00372 float ir_buf[SUBFR_SIZE + LP_FILTER_ORDER];
00373 float lsf_new[LP_FILTER_ORDER];
00374 float *impulse_response = ir_buf + LP_FILTER_ORDER;
00375 float *synth = ctx->synth_buf + 16;
00376
00377 int t0_first = 0;
00378 AMRFixed fixed_cb;
00379
00380 memset(ir_buf, 0, LP_FILTER_ORDER * sizeof(float));
00381 lsf_decode_fp(lsf_new, ctx->lsf_history, params);
00382
00383 sipr_decode_lp(lsf_new, ctx->lsp_history, Az, subframe_count);
00384
00385 memcpy(ctx->lsp_history, lsf_new, LP_FILTER_ORDER * sizeof(float));
00386
00387 excitation = ctx->excitation + PITCH_DELAY_MAX + L_INTERPOL;
00388
00389 for (i = 0; i < subframe_count; i++) {
00390 float *pAz = Az + i*LP_FILTER_ORDER;
00391 float fixed_vector[SUBFR_SIZE];
00392 int T0,T0_frac;
00393 float pitch_gain, gain_code, avg_energy;
00394
00395 ff_decode_pitch_lag(&T0, &T0_frac, params->pitch_delay[i], t0_first, i,
00396 ctx->mode == MODE_5k0, 6);
00397
00398 if (i == 0 || (i == 2 && ctx->mode == MODE_5k0))
00399 t0_first = T0;
00400
00401 ff_acelp_interpolatef(excitation, excitation - T0 + (T0_frac <= 0),
00402 ff_b60_sinc, 6,
00403 2 * ((2 + T0_frac)%3 + 1), LP_FILTER_ORDER,
00404 SUBFR_SIZE);
00405
00406 decode_fixed_sparse(&fixed_cb, params->fc_indexes[i], ctx->mode,
00407 ctx->past_pitch_gain < 0.8);
00408
00409 eval_ir(pAz, T0, impulse_response, modes[ctx->mode].pitch_sharp_factor);
00410
00411 convolute_with_sparse(fixed_vector, &fixed_cb, impulse_response,
00412 SUBFR_SIZE);
00413
00414 avg_energy =
00415 (0.01 + ff_dot_productf(fixed_vector, fixed_vector, SUBFR_SIZE))/
00416 SUBFR_SIZE;
00417
00418 ctx->past_pitch_gain = pitch_gain = gain_cb[params->gc_index[i]][0];
00419
00420 gain_code = ff_amr_set_fixed_gain(gain_cb[params->gc_index[i]][1],
00421 avg_energy, ctx->energy_history,
00422 34 - 15.0/(0.05*M_LN10/M_LN2),
00423 pred);
00424
00425 ff_weighted_vector_sumf(excitation, excitation, fixed_vector,
00426 pitch_gain, gain_code, SUBFR_SIZE);
00427
00428 pitch_gain *= 0.5 * pitch_gain;
00429 pitch_gain = FFMIN(pitch_gain, 0.4);
00430
00431 ctx->gain_mem = 0.7 * ctx->gain_mem + 0.3 * pitch_gain;
00432 ctx->gain_mem = FFMIN(ctx->gain_mem, pitch_gain);
00433 gain_code *= ctx->gain_mem;
00434
00435 for (j = 0; j < SUBFR_SIZE; j++)
00436 fixed_vector[j] = excitation[j] - gain_code * fixed_vector[j];
00437
00438 if (ctx->mode == MODE_5k0) {
00439 postfilter_5k0(ctx, pAz, fixed_vector);
00440
00441 ff_celp_lp_synthesis_filterf(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i*SUBFR_SIZE,
00442 pAz, excitation, SUBFR_SIZE,
00443 LP_FILTER_ORDER);
00444 }
00445
00446 ff_celp_lp_synthesis_filterf(synth + i*SUBFR_SIZE, pAz, fixed_vector,
00447 SUBFR_SIZE, LP_FILTER_ORDER);
00448
00449 excitation += SUBFR_SIZE;
00450 }
00451
00452 memcpy(synth - LP_FILTER_ORDER, synth + frame_size - LP_FILTER_ORDER,
00453 LP_FILTER_ORDER * sizeof(float));
00454
00455 if (ctx->mode == MODE_5k0) {
00456 for (i = 0; i < subframe_count; i++) {
00457 float energy = ff_dot_productf(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i*SUBFR_SIZE,
00458 ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i*SUBFR_SIZE,
00459 SUBFR_SIZE);
00460 ff_adaptive_gain_control(&synth[i * SUBFR_SIZE],
00461 &synth[i * SUBFR_SIZE], energy,
00462 SUBFR_SIZE, 0.9, &ctx->postfilter_agc);
00463 }
00464
00465 memcpy(ctx->postfilter_syn5k0, ctx->postfilter_syn5k0 + frame_size,
00466 LP_FILTER_ORDER*sizeof(float));
00467 }
00468 memmove(ctx->excitation, excitation - PITCH_DELAY_MAX - L_INTERPOL,
00469 (PITCH_DELAY_MAX + L_INTERPOL) * sizeof(float));
00470
00471 ff_acelp_apply_order_2_transfer_function(out_data, synth,
00472 (const float[2]) {-1.99997 , 1.000000000},
00473 (const float[2]) {-1.93307352, 0.935891986},
00474 0.939805806,
00475 ctx->highpass_filt_mem,
00476 frame_size);
00477 }
00478
00479 static av_cold int sipr_decoder_init(AVCodecContext * avctx)
00480 {
00481 SiprContext *ctx = avctx->priv_data;
00482 int i;
00483
00484 switch (avctx->block_align) {
00485 case 20: ctx->mode = MODE_16k; break;
00486 case 19: ctx->mode = MODE_8k5; break;
00487 case 29: ctx->mode = MODE_6k5; break;
00488 case 37: ctx->mode = MODE_5k0; break;
00489 default:
00490 if (avctx->bit_rate > 12200) ctx->mode = MODE_16k;
00491 else if (avctx->bit_rate > 7500 ) ctx->mode = MODE_8k5;
00492 else if (avctx->bit_rate > 5750 ) ctx->mode = MODE_6k5;
00493 else ctx->mode = MODE_5k0;
00494 av_log(avctx, AV_LOG_WARNING,
00495 "Invalid block_align: %d. Mode %s guessed based on bitrate: %d\n",
00496 avctx->block_align, modes[ctx->mode].mode_name, avctx->bit_rate);
00497 }
00498
00499 av_log(avctx, AV_LOG_DEBUG, "Mode: %s\n", modes[ctx->mode].mode_name);
00500
00501 if (ctx->mode == MODE_16k) {
00502 ff_sipr_init_16k(ctx);
00503 ctx->decode_frame = ff_sipr_decode_frame_16k;
00504 } else {
00505 ctx->decode_frame = decode_frame;
00506 }
00507
00508 for (i = 0; i < LP_FILTER_ORDER; i++)
00509 ctx->lsp_history[i] = cos((i+1) * M_PI / (LP_FILTER_ORDER + 1));
00510
00511 for (i = 0; i < 4; i++)
00512 ctx->energy_history[i] = -14;
00513
00514 avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00515
00516 avcodec_get_frame_defaults(&ctx->frame);
00517 avctx->coded_frame = &ctx->frame;
00518
00519 return 0;
00520 }
00521
00522 static int sipr_decode_frame(AVCodecContext *avctx, void *data,
00523 int *got_frame_ptr, AVPacket *avpkt)
00524 {
00525 SiprContext *ctx = avctx->priv_data;
00526 const uint8_t *buf=avpkt->data;
00527 SiprParameters parm;
00528 const SiprModeParam *mode_par = &modes[ctx->mode];
00529 GetBitContext gb;
00530 float *samples;
00531 int subframe_size = ctx->mode == MODE_16k ? L_SUBFR_16k : SUBFR_SIZE;
00532 int i, ret;
00533
00534 ctx->avctx = avctx;
00535 if (avpkt->size < (mode_par->bits_per_frame >> 3)) {
00536 av_log(avctx, AV_LOG_ERROR,
00537 "Error processing packet: packet size (%d) too small\n",
00538 avpkt->size);
00539 return -1;
00540 }
00541
00542
00543 ctx->frame.nb_samples = mode_par->frames_per_packet * subframe_size *
00544 mode_par->subframe_count;
00545 if ((ret = ff_get_buffer(avctx, &ctx->frame)) < 0) {
00546 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
00547 return ret;
00548 }
00549 samples = (float *)ctx->frame.data[0];
00550
00551 init_get_bits(&gb, buf, mode_par->bits_per_frame);
00552
00553 for (i = 0; i < mode_par->frames_per_packet; i++) {
00554 decode_parameters(&parm, &gb, mode_par);
00555
00556 ctx->decode_frame(ctx, &parm, samples);
00557
00558 samples += subframe_size * mode_par->subframe_count;
00559 }
00560
00561 *got_frame_ptr = 1;
00562 *(AVFrame *)data = ctx->frame;
00563
00564 return mode_par->bits_per_frame >> 3;
00565 }
00566
00567 AVCodec ff_sipr_decoder = {
00568 .name = "sipr",
00569 .type = AVMEDIA_TYPE_AUDIO,
00570 .id = CODEC_ID_SIPR,
00571 .priv_data_size = sizeof(SiprContext),
00572 .init = sipr_decoder_init,
00573 .decode = sipr_decode_frame,
00574 .capabilities = CODEC_CAP_DR1,
00575 .long_name = NULL_IF_CONFIG_SMALL("RealAudio SIPR / ACELP.NET"),
00576 };