Libav
|
00001 /* 00002 * AMR narrowband decoder 00003 * Copyright (c) 2006-2007 Robert Swain 00004 * Copyright (c) 2009 Colin McQuillan 00005 * 00006 * This file is part of FFmpeg. 00007 * 00008 * FFmpeg is free software; you can redistribute it and/or 00009 * modify it under the terms of the GNU Lesser General Public 00010 * License as published by the Free Software Foundation; either 00011 * version 2.1 of the License, or (at your option) any later version. 00012 * 00013 * FFmpeg is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 * Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public 00019 * License along with FFmpeg; if not, write to the Free Software 00020 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00021 */ 00022 00023 00043 #include <string.h> 00044 #include <math.h> 00045 00046 #include "avcodec.h" 00047 #include "get_bits.h" 00048 #include "libavutil/common.h" 00049 #include "celp_math.h" 00050 #include "celp_filters.h" 00051 #include "acelp_filters.h" 00052 #include "acelp_vectors.h" 00053 #include "acelp_pitch_delay.h" 00054 #include "lsp.h" 00055 00056 #include "amrnbdata.h" 00057 00058 #define AMR_BLOCK_SIZE 160 ///< samples per frame 00059 #define AMR_SAMPLE_BOUND 32768.0 ///< threshold for synthesis overflow 00060 00070 #define AMR_SAMPLE_SCALE (2.0 / 32768.0) 00071 00073 #define PRED_FAC_MODE_12k2 0.65 00074 00075 #define LSF_R_FAC (8000.0 / 32768.0) ///< LSF residual tables to Hertz 00076 #define MIN_LSF_SPACING (50.0488 / 8000.0) ///< Ensures stability of LPC filter 00077 #define PITCH_LAG_MIN_MODE_12k2 18 ///< Lower bound on decoded lag search in 12.2kbit/s mode 00078 00080 #define MIN_ENERGY -14.0 00081 00087 #define SHARP_MAX 0.79449462890625 00088 00090 #define AMR_TILT_RESPONSE 22 00091 00092 #define AMR_TILT_GAMMA_T 0.8 00093 00094 #define AMR_AGC_ALPHA 0.9 00095 00096 typedef struct AMRContext { 00097 AMRNBFrame frame; 00098 uint8_t bad_frame_indicator; 00099 enum Mode cur_frame_mode; 00100 00101 int16_t prev_lsf_r[LP_FILTER_ORDER]; 00102 double lsp[4][LP_FILTER_ORDER]; 00103 double prev_lsp_sub4[LP_FILTER_ORDER]; 00104 00105 float lsf_q[4][LP_FILTER_ORDER]; 00106 float lsf_avg[LP_FILTER_ORDER]; 00107 00108 float lpc[4][LP_FILTER_ORDER]; 00109 00110 uint8_t pitch_lag_int; 00111 00112 float excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1 + AMR_SUBFRAME_SIZE]; 00113 float *excitation; 00114 00115 float pitch_vector[AMR_SUBFRAME_SIZE]; 00116 float fixed_vector[AMR_SUBFRAME_SIZE]; 00117 00118 float prediction_error[4]; 00119 float pitch_gain[5]; 00120 float fixed_gain[5]; 00121 00122 float beta; 00123 uint8_t diff_count; 00124 uint8_t hang_count; 00125 00126 float prev_sparse_fixed_gain; 00127 uint8_t prev_ir_filter_nr; 00128 uint8_t ir_filter_onset; 00129 00130 float postfilter_mem[10]; 00131 float tilt_mem; 00132 float postfilter_agc; 00133 float high_pass_mem[2]; 00134 00135 float samples_in[LP_FILTER_ORDER + AMR_SUBFRAME_SIZE]; 00136 00137 } AMRContext; 00138 00140 static void weighted_vector_sumd(double *out, const double *in_a, 00141 const double *in_b, double weight_coeff_a, 00142 double weight_coeff_b, int length) 00143 { 00144 int i; 00145 00146 for (i = 0; i < length; i++) 00147 out[i] = weight_coeff_a * in_a[i] 00148 + weight_coeff_b * in_b[i]; 00149 } 00150 00151 static av_cold int amrnb_decode_init(AVCodecContext *avctx) 00152 { 00153 AMRContext *p = avctx->priv_data; 00154 int i; 00155 00156 avctx->sample_fmt = SAMPLE_FMT_FLT; 00157 00158 // p->excitation always points to the same position in p->excitation_buf 00159 p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1]; 00160 00161 for (i = 0; i < LP_FILTER_ORDER; i++) { 00162 p->prev_lsp_sub4[i] = lsp_sub4_init[i] * 1000 / (float)(1 << 15); 00163 p->lsf_avg[i] = p->lsf_q[3][i] = lsp_avg_init[i] / (float)(1 << 15); 00164 } 00165 00166 for (i = 0; i < 4; i++) 00167 p->prediction_error[i] = MIN_ENERGY; 00168 00169 return 0; 00170 } 00171 00172 00184 static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf, 00185 int buf_size) 00186 { 00187 GetBitContext gb; 00188 enum Mode mode; 00189 00190 init_get_bits(&gb, buf, buf_size * 8); 00191 00192 // Decode the first octet. 00193 skip_bits(&gb, 1); // padding bit 00194 mode = get_bits(&gb, 4); // frame type 00195 p->bad_frame_indicator = !get_bits1(&gb); // quality bit 00196 skip_bits(&gb, 2); // two padding bits 00197 00198 if (mode < MODE_DTX) { 00199 uint16_t *data = (uint16_t *)&p->frame; 00200 const uint8_t *order = amr_unpacking_bitmaps_per_mode[mode]; 00201 int field_size; 00202 00203 memset(&p->frame, 0, sizeof(AMRNBFrame)); 00204 buf++; 00205 while ((field_size = *order++)) { 00206 int field = 0; 00207 int field_offset = *order++; 00208 while (field_size--) { 00209 int bit = *order++; 00210 field <<= 1; 00211 field |= buf[bit >> 3] >> (bit & 7) & 1; 00212 } 00213 data[field_offset] = field; 00214 } 00215 } 00216 00217 return mode; 00218 } 00219 00220 00223 00230 static void lsf2lsp(const float *lsf, double *lsp) 00231 { 00232 int i; 00233 00234 for (i = 0; i < LP_FILTER_ORDER; i++) 00235 lsp[i] = cos(2.0 * M_PI * lsf[i]); 00236 } 00237 00245 static void interpolate_lsf(float lsf_q[4][LP_FILTER_ORDER], float *lsf_new) 00246 { 00247 int i; 00248 00249 for (i = 0; i < 4; i++) 00250 ff_weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new, 00251 0.25 * (3 - i), 0.25 * (i + 1), 00252 LP_FILTER_ORDER); 00253 } 00254 00266 static void lsf2lsp_for_mode12k2(AMRContext *p, double lsp[LP_FILTER_ORDER], 00267 const float lsf_no_r[LP_FILTER_ORDER], 00268 const int16_t *lsf_quantizer[5], 00269 const int quantizer_offset, 00270 const int sign, const int update) 00271 { 00272 int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector 00273 float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector 00274 int i; 00275 00276 for (i = 0; i < LP_FILTER_ORDER >> 1; i++) 00277 memcpy(&lsf_r[i << 1], &lsf_quantizer[i][quantizer_offset], 00278 2 * sizeof(*lsf_r)); 00279 00280 if (sign) { 00281 lsf_r[4] *= -1; 00282 lsf_r[5] *= -1; 00283 } 00284 00285 if (update) 00286 memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(float)); 00287 00288 for (i = 0; i < LP_FILTER_ORDER; i++) 00289 lsf_q[i] = lsf_r[i] * (LSF_R_FAC / 8000.0) + lsf_no_r[i] * (1.0 / 8000.0); 00290 00291 ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); 00292 00293 if (update) 00294 interpolate_lsf(p->lsf_q, lsf_q); 00295 00296 lsf2lsp(lsf_q, lsp); 00297 } 00298 00304 static void lsf2lsp_5(AMRContext *p) 00305 { 00306 const uint16_t *lsf_param = p->frame.lsf; 00307 float lsf_no_r[LP_FILTER_ORDER]; // LSFs without the residual vector 00308 const int16_t *lsf_quantizer[5]; 00309 int i; 00310 00311 lsf_quantizer[0] = lsf_5_1[lsf_param[0]]; 00312 lsf_quantizer[1] = lsf_5_2[lsf_param[1]]; 00313 lsf_quantizer[2] = lsf_5_3[lsf_param[2] >> 1]; 00314 lsf_quantizer[3] = lsf_5_4[lsf_param[3]]; 00315 lsf_quantizer[4] = lsf_5_5[lsf_param[4]]; 00316 00317 for (i = 0; i < LP_FILTER_ORDER; i++) 00318 lsf_no_r[i] = p->prev_lsf_r[i] * LSF_R_FAC * PRED_FAC_MODE_12k2 + lsf_5_mean[i]; 00319 00320 lsf2lsp_for_mode12k2(p, p->lsp[1], lsf_no_r, lsf_quantizer, 0, lsf_param[2] & 1, 0); 00321 lsf2lsp_for_mode12k2(p, p->lsp[3], lsf_no_r, lsf_quantizer, 2, lsf_param[2] & 1, 1); 00322 00323 // interpolate LSP vectors at subframes 1 and 3 00324 weighted_vector_sumd(p->lsp[0], p->prev_lsp_sub4, p->lsp[1], 0.5, 0.5, LP_FILTER_ORDER); 00325 weighted_vector_sumd(p->lsp[2], p->lsp[1] , p->lsp[3], 0.5, 0.5, LP_FILTER_ORDER); 00326 } 00327 00333 static void lsf2lsp_3(AMRContext *p) 00334 { 00335 const uint16_t *lsf_param = p->frame.lsf; 00336 int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector 00337 float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector 00338 const int16_t *lsf_quantizer; 00339 int i, j; 00340 00341 lsf_quantizer = (p->cur_frame_mode == MODE_7k95 ? lsf_3_1_MODE_7k95 : lsf_3_1)[lsf_param[0]]; 00342 memcpy(lsf_r, lsf_quantizer, 3 * sizeof(*lsf_r)); 00343 00344 lsf_quantizer = lsf_3_2[lsf_param[1] << (p->cur_frame_mode <= MODE_5k15)]; 00345 memcpy(lsf_r + 3, lsf_quantizer, 3 * sizeof(*lsf_r)); 00346 00347 lsf_quantizer = (p->cur_frame_mode <= MODE_5k15 ? lsf_3_3_MODE_5k15 : lsf_3_3)[lsf_param[2]]; 00348 memcpy(lsf_r + 6, lsf_quantizer, 4 * sizeof(*lsf_r)); 00349 00350 // calculate mean-removed LSF vector and add mean 00351 for (i = 0; i < LP_FILTER_ORDER; i++) 00352 lsf_q[i] = (lsf_r[i] + p->prev_lsf_r[i] * pred_fac[i]) * (LSF_R_FAC / 8000.0) + lsf_3_mean[i] * (1.0 / 8000.0); 00353 00354 ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); 00355 00356 // store data for computing the next frame's LSFs 00357 interpolate_lsf(p->lsf_q, lsf_q); 00358 memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r)); 00359 00360 lsf2lsp(lsf_q, p->lsp[3]); 00361 00362 // interpolate LSP vectors at subframes 1, 2 and 3 00363 for (i = 1; i <= 3; i++) 00364 for(j = 0; j < LP_FILTER_ORDER; j++) 00365 p->lsp[i-1][j] = p->prev_lsp_sub4[j] + 00366 (p->lsp[3][j] - p->prev_lsp_sub4[j]) * 0.25 * i; 00367 } 00368 00370 00371 00374 00378 static void decode_pitch_lag_1_6(int *lag_int, int *lag_frac, int pitch_index, 00379 const int prev_lag_int, const int subframe) 00380 { 00381 if (subframe == 0 || subframe == 2) { 00382 if (pitch_index < 463) { 00383 *lag_int = (pitch_index + 107) * 10923 >> 16; 00384 *lag_frac = pitch_index - *lag_int * 6 + 105; 00385 } else { 00386 *lag_int = pitch_index - 368; 00387 *lag_frac = 0; 00388 } 00389 } else { 00390 *lag_int = ((pitch_index + 5) * 10923 >> 16) - 1; 00391 *lag_frac = pitch_index - *lag_int * 6 - 3; 00392 *lag_int += av_clip(prev_lag_int - 5, PITCH_LAG_MIN_MODE_12k2, 00393 PITCH_DELAY_MAX - 9); 00394 } 00395 } 00396 00397 static void decode_pitch_vector(AMRContext *p, 00398 const AMRNBSubframe *amr_subframe, 00399 const int subframe) 00400 { 00401 int pitch_lag_int, pitch_lag_frac; 00402 enum Mode mode = p->cur_frame_mode; 00403 00404 if (p->cur_frame_mode == MODE_12k2) { 00405 decode_pitch_lag_1_6(&pitch_lag_int, &pitch_lag_frac, 00406 amr_subframe->p_lag, p->pitch_lag_int, 00407 subframe); 00408 } else 00409 ff_decode_pitch_lag(&pitch_lag_int, &pitch_lag_frac, 00410 amr_subframe->p_lag, 00411 p->pitch_lag_int, subframe, 00412 mode != MODE_4k75 && mode != MODE_5k15, 00413 mode <= MODE_6k7 ? 4 : (mode == MODE_7k95 ? 5 : 6)); 00414 00415 p->pitch_lag_int = pitch_lag_int; // store previous lag in a uint8_t 00416 00417 pitch_lag_frac <<= (p->cur_frame_mode != MODE_12k2); 00418 00419 pitch_lag_int += pitch_lag_frac > 0; 00420 00421 /* Calculate the pitch vector by interpolating the past excitation at the 00422 pitch lag using a b60 hamming windowed sinc function. */ 00423 ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int, 00424 ff_b60_sinc, 6, 00425 pitch_lag_frac + 6 - 6*(pitch_lag_frac > 0), 00426 10, AMR_SUBFRAME_SIZE); 00427 00428 memcpy(p->pitch_vector, p->excitation, AMR_SUBFRAME_SIZE * sizeof(float)); 00429 } 00430 00432 00433 00436 00440 static void decode_10bit_pulse(int code, int pulse_position[8], 00441 int i1, int i2, int i3) 00442 { 00443 // coded using 7+3 bits with the 3 LSBs being, individually, the LSB of 1 of 00444 // the 3 pulses and the upper 7 bits being coded in base 5 00445 const uint8_t *positions = base_five_table[code >> 3]; 00446 pulse_position[i1] = (positions[2] << 1) + ( code & 1); 00447 pulse_position[i2] = (positions[1] << 1) + ((code >> 1) & 1); 00448 pulse_position[i3] = (positions[0] << 1) + ((code >> 2) & 1); 00449 } 00450 00458 static void decode_8_pulses_31bits(const int16_t *fixed_index, 00459 AMRFixed *fixed_sparse) 00460 { 00461 int pulse_position[8]; 00462 int i, temp; 00463 00464 decode_10bit_pulse(fixed_index[4], pulse_position, 0, 4, 1); 00465 decode_10bit_pulse(fixed_index[5], pulse_position, 2, 6, 5); 00466 00467 // coded using 5+2 bits with the 2 LSBs being, individually, the LSB of 1 of 00468 // the 2 pulses and the upper 5 bits being coded in base 5 00469 temp = ((fixed_index[6] >> 2) * 25 + 12) >> 5; 00470 pulse_position[3] = temp % 5; 00471 pulse_position[7] = temp / 5; 00472 if (pulse_position[7] & 1) 00473 pulse_position[3] = 4 - pulse_position[3]; 00474 pulse_position[3] = (pulse_position[3] << 1) + ( fixed_index[6] & 1); 00475 pulse_position[7] = (pulse_position[7] << 1) + ((fixed_index[6] >> 1) & 1); 00476 00477 fixed_sparse->n = 8; 00478 for (i = 0; i < 4; i++) { 00479 const int pos1 = (pulse_position[i] << 2) + i; 00480 const int pos2 = (pulse_position[i + 4] << 2) + i; 00481 const float sign = fixed_index[i] ? -1.0 : 1.0; 00482 fixed_sparse->x[i ] = pos1; 00483 fixed_sparse->x[i + 4] = pos2; 00484 fixed_sparse->y[i ] = sign; 00485 fixed_sparse->y[i + 4] = pos2 < pos1 ? -sign : sign; 00486 } 00487 } 00488 00504 static void decode_fixed_sparse(AMRFixed *fixed_sparse, const uint16_t *pulses, 00505 const enum Mode mode, const int subframe) 00506 { 00507 assert(MODE_4k75 <= mode && mode <= MODE_12k2); 00508 00509 if (mode == MODE_12k2) { 00510 ff_decode_10_pulses_35bits(pulses, fixed_sparse, gray_decode, 5, 3); 00511 } else if (mode == MODE_10k2) { 00512 decode_8_pulses_31bits(pulses, fixed_sparse); 00513 } else { 00514 int *pulse_position = fixed_sparse->x; 00515 int i, pulse_subset; 00516 const int fixed_index = pulses[0]; 00517 00518 if (mode <= MODE_5k15) { 00519 pulse_subset = ((fixed_index >> 3) & 8) + (subframe << 1); 00520 pulse_position[0] = ( fixed_index & 7) * 5 + track_position[pulse_subset]; 00521 pulse_position[1] = ((fixed_index >> 3) & 7) * 5 + track_position[pulse_subset + 1]; 00522 fixed_sparse->n = 2; 00523 } else if (mode == MODE_5k9) { 00524 pulse_subset = ((fixed_index & 1) << 1) + 1; 00525 pulse_position[0] = ((fixed_index >> 1) & 7) * 5 + pulse_subset; 00526 pulse_subset = (fixed_index >> 4) & 3; 00527 pulse_position[1] = ((fixed_index >> 6) & 7) * 5 + pulse_subset + (pulse_subset == 3 ? 1 : 0); 00528 fixed_sparse->n = pulse_position[0] == pulse_position[1] ? 1 : 2; 00529 } else if (mode == MODE_6k7) { 00530 pulse_position[0] = (fixed_index & 7) * 5; 00531 pulse_subset = (fixed_index >> 2) & 2; 00532 pulse_position[1] = ((fixed_index >> 4) & 7) * 5 + pulse_subset + 1; 00533 pulse_subset = (fixed_index >> 6) & 2; 00534 pulse_position[2] = ((fixed_index >> 8) & 7) * 5 + pulse_subset + 2; 00535 fixed_sparse->n = 3; 00536 } else { // mode <= MODE_7k95 00537 pulse_position[0] = gray_decode[ fixed_index & 7]; 00538 pulse_position[1] = gray_decode[(fixed_index >> 3) & 7] + 1; 00539 pulse_position[2] = gray_decode[(fixed_index >> 6) & 7] + 2; 00540 pulse_subset = (fixed_index >> 9) & 1; 00541 pulse_position[3] = gray_decode[(fixed_index >> 10) & 7] + pulse_subset + 3; 00542 fixed_sparse->n = 4; 00543 } 00544 for (i = 0; i < fixed_sparse->n; i++) 00545 fixed_sparse->y[i] = (pulses[1] >> i) & 1 ? 1.0 : -1.0; 00546 } 00547 } 00548 00557 static void pitch_sharpening(AMRContext *p, int subframe, enum Mode mode, 00558 AMRFixed *fixed_sparse) 00559 { 00560 // The spec suggests the current pitch gain is always used, but in other 00561 // modes the pitch and codebook gains are joinly quantized (sec 5.8.2) 00562 // so the codebook gain cannot depend on the quantized pitch gain. 00563 if (mode == MODE_12k2) 00564 p->beta = FFMIN(p->pitch_gain[4], 1.0); 00565 00566 fixed_sparse->pitch_lag = p->pitch_lag_int; 00567 fixed_sparse->pitch_fac = p->beta; 00568 00569 // Save pitch sharpening factor for the next subframe 00570 // MODE_4k75 only updates on the 2nd and 4th subframes - this follows from 00571 // the fact that the gains for two subframes are jointly quantized. 00572 if (mode != MODE_4k75 || subframe & 1) 00573 p->beta = av_clipf(p->pitch_gain[4], 0.0, SHARP_MAX); 00574 } 00576 00577 00580 00593 static float fixed_gain_smooth(AMRContext *p , const float *lsf, 00594 const float *lsf_avg, const enum Mode mode) 00595 { 00596 float diff = 0.0; 00597 int i; 00598 00599 for (i = 0; i < LP_FILTER_ORDER; i++) 00600 diff += fabs(lsf_avg[i] - lsf[i]) / lsf_avg[i]; 00601 00602 // If diff is large for ten subframes, disable smoothing for a 40-subframe 00603 // hangover period. 00604 p->diff_count++; 00605 if (diff <= 0.65) 00606 p->diff_count = 0; 00607 00608 if (p->diff_count > 10) { 00609 p->hang_count = 0; 00610 p->diff_count--; // don't let diff_count overflow 00611 } 00612 00613 if (p->hang_count < 40) { 00614 p->hang_count++; 00615 } else if (mode < MODE_7k4 || mode == MODE_10k2) { 00616 const float smoothing_factor = av_clipf(4.0 * diff - 1.6, 0.0, 1.0); 00617 const float fixed_gain_mean = (p->fixed_gain[0] + p->fixed_gain[1] + 00618 p->fixed_gain[2] + p->fixed_gain[3] + 00619 p->fixed_gain[4]) * 0.2; 00620 return smoothing_factor * p->fixed_gain[4] + 00621 (1.0 - smoothing_factor) * fixed_gain_mean; 00622 } 00623 return p->fixed_gain[4]; 00624 } 00625 00635 static void decode_gains(AMRContext *p, const AMRNBSubframe *amr_subframe, 00636 const enum Mode mode, const int subframe, 00637 float *fixed_gain_factor) 00638 { 00639 if (mode == MODE_12k2 || mode == MODE_7k95) { 00640 p->pitch_gain[4] = qua_gain_pit [amr_subframe->p_gain ] 00641 * (1.0 / 16384.0); 00642 *fixed_gain_factor = qua_gain_code[amr_subframe->fixed_gain] 00643 * (1.0 / 2048.0); 00644 } else { 00645 const uint16_t *gains; 00646 00647 if (mode >= MODE_6k7) { 00648 gains = gains_high[amr_subframe->p_gain]; 00649 } else if (mode >= MODE_5k15) { 00650 gains = gains_low [amr_subframe->p_gain]; 00651 } else { 00652 // gain index is only coded in subframes 0,2 for MODE_4k75 00653 gains = gains_MODE_4k75[(p->frame.subframe[subframe & 2].p_gain << 1) + (subframe & 1)]; 00654 } 00655 00656 p->pitch_gain[4] = gains[0] * (1.0 / 16384.0); 00657 *fixed_gain_factor = gains[1] * (1.0 / 4096.0); 00658 } 00659 } 00660 00662 00663 00666 00677 static void apply_ir_filter(float *out, const AMRFixed *in, 00678 const float *filter) 00679 { 00680 float filter1[AMR_SUBFRAME_SIZE], 00681 filter2[AMR_SUBFRAME_SIZE]; 00682 int lag = in->pitch_lag; 00683 float fac = in->pitch_fac; 00684 int i; 00685 00686 if (lag < AMR_SUBFRAME_SIZE) { 00687 ff_celp_circ_addf(filter1, filter, filter, lag, fac, 00688 AMR_SUBFRAME_SIZE); 00689 00690 if (lag < AMR_SUBFRAME_SIZE >> 1) 00691 ff_celp_circ_addf(filter2, filter, filter1, lag, fac, 00692 AMR_SUBFRAME_SIZE); 00693 } 00694 00695 memset(out, 0, sizeof(float) * AMR_SUBFRAME_SIZE); 00696 for (i = 0; i < in->n; i++) { 00697 int x = in->x[i]; 00698 float y = in->y[i]; 00699 const float *filterp; 00700 00701 if (x >= AMR_SUBFRAME_SIZE - lag) { 00702 filterp = filter; 00703 } else if (x >= AMR_SUBFRAME_SIZE - (lag << 1)) { 00704 filterp = filter1; 00705 } else 00706 filterp = filter2; 00707 00708 ff_celp_circ_addf(out, out, filterp, x, y, AMR_SUBFRAME_SIZE); 00709 } 00710 } 00711 00724 static const float *anti_sparseness(AMRContext *p, AMRFixed *fixed_sparse, 00725 const float *fixed_vector, 00726 float fixed_gain, float *out) 00727 { 00728 int ir_filter_nr; 00729 00730 if (p->pitch_gain[4] < 0.6) { 00731 ir_filter_nr = 0; // strong filtering 00732 } else if (p->pitch_gain[4] < 0.9) { 00733 ir_filter_nr = 1; // medium filtering 00734 } else 00735 ir_filter_nr = 2; // no filtering 00736 00737 // detect 'onset' 00738 if (fixed_gain > 2.0 * p->prev_sparse_fixed_gain) { 00739 p->ir_filter_onset = 2; 00740 } else if (p->ir_filter_onset) 00741 p->ir_filter_onset--; 00742 00743 if (!p->ir_filter_onset) { 00744 int i, count = 0; 00745 00746 for (i = 0; i < 5; i++) 00747 if (p->pitch_gain[i] < 0.6) 00748 count++; 00749 if (count > 2) 00750 ir_filter_nr = 0; 00751 00752 if (ir_filter_nr > p->prev_ir_filter_nr + 1) 00753 ir_filter_nr--; 00754 } else if (ir_filter_nr < 2) 00755 ir_filter_nr++; 00756 00757 // Disable filtering for very low level of fixed_gain. 00758 // Note this step is not specified in the technical description but is in 00759 // the reference source in the function Ph_disp. 00760 if (fixed_gain < 5.0) 00761 ir_filter_nr = 2; 00762 00763 if (p->cur_frame_mode != MODE_7k4 && p->cur_frame_mode < MODE_10k2 00764 && ir_filter_nr < 2) { 00765 apply_ir_filter(out, fixed_sparse, 00766 (p->cur_frame_mode == MODE_7k95 ? 00767 ir_filters_lookup_MODE_7k95 : 00768 ir_filters_lookup)[ir_filter_nr]); 00769 fixed_vector = out; 00770 } 00771 00772 // update ir filter strength history 00773 p->prev_ir_filter_nr = ir_filter_nr; 00774 p->prev_sparse_fixed_gain = fixed_gain; 00775 00776 return fixed_vector; 00777 } 00778 00780 00781 00784 00795 static int synthesis(AMRContext *p, float *lpc, 00796 float fixed_gain, const float *fixed_vector, 00797 float *samples, uint8_t overflow) 00798 { 00799 int i; 00800 float excitation[AMR_SUBFRAME_SIZE]; 00801 00802 // if an overflow has been detected, the pitch vector is scaled down by a 00803 // factor of 4 00804 if (overflow) 00805 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) 00806 p->pitch_vector[i] *= 0.25; 00807 00808 ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector, 00809 p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE); 00810 00811 // emphasize pitch vector contribution 00812 if (p->pitch_gain[4] > 0.5 && !overflow) { 00813 float energy = ff_dot_productf(excitation, excitation, 00814 AMR_SUBFRAME_SIZE); 00815 float pitch_factor = 00816 p->pitch_gain[4] * 00817 (p->cur_frame_mode == MODE_12k2 ? 00818 0.25 * FFMIN(p->pitch_gain[4], 1.0) : 00819 0.5 * FFMIN(p->pitch_gain[4], SHARP_MAX)); 00820 00821 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) 00822 excitation[i] += pitch_factor * p->pitch_vector[i]; 00823 00824 ff_scale_vector_to_given_sum_of_squares(excitation, excitation, energy, 00825 AMR_SUBFRAME_SIZE); 00826 } 00827 00828 ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE, 00829 LP_FILTER_ORDER); 00830 00831 // detect overflow 00832 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) 00833 if (fabsf(samples[i]) > AMR_SAMPLE_BOUND) { 00834 return 1; 00835 } 00836 00837 return 0; 00838 } 00839 00841 00842 00845 00851 static void update_state(AMRContext *p) 00852 { 00853 memcpy(p->prev_lsp_sub4, p->lsp[3], LP_FILTER_ORDER * sizeof(p->lsp[3][0])); 00854 00855 memmove(&p->excitation_buf[0], &p->excitation_buf[AMR_SUBFRAME_SIZE], 00856 (PITCH_DELAY_MAX + LP_FILTER_ORDER + 1) * sizeof(float)); 00857 00858 memmove(&p->pitch_gain[0], &p->pitch_gain[1], 4 * sizeof(float)); 00859 memmove(&p->fixed_gain[0], &p->fixed_gain[1], 4 * sizeof(float)); 00860 00861 memmove(&p->samples_in[0], &p->samples_in[AMR_SUBFRAME_SIZE], 00862 LP_FILTER_ORDER * sizeof(float)); 00863 } 00864 00866 00867 00870 00877 static float tilt_factor(float *lpc_n, float *lpc_d) 00878 { 00879 float rh0, rh1; // autocorrelation at lag 0 and 1 00880 00881 // LP_FILTER_ORDER prior zeros are needed for ff_celp_lp_synthesis_filterf 00882 float impulse_buffer[LP_FILTER_ORDER + AMR_TILT_RESPONSE] = { 0 }; 00883 float *hf = impulse_buffer + LP_FILTER_ORDER; // start of impulse response 00884 00885 hf[0] = 1.0; 00886 memcpy(hf + 1, lpc_n, sizeof(float) * LP_FILTER_ORDER); 00887 ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE, 00888 LP_FILTER_ORDER); 00889 00890 rh0 = ff_dot_productf(hf, hf, AMR_TILT_RESPONSE); 00891 rh1 = ff_dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1); 00892 00893 // The spec only specifies this check for 12.2 and 10.2 kbit/s 00894 // modes. But in the ref source the tilt is always non-negative. 00895 return rh1 >= 0.0 ? rh1 / rh0 * AMR_TILT_GAMMA_T : 0.0; 00896 } 00897 00906 static void postfilter(AMRContext *p, float *lpc, float *buf_out) 00907 { 00908 int i; 00909 float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input 00910 00911 float speech_gain = ff_dot_productf(samples, samples, 00912 AMR_SUBFRAME_SIZE); 00913 00914 float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter 00915 const float *gamma_n, *gamma_d; // Formant filter factor table 00916 float lpc_n[LP_FILTER_ORDER], lpc_d[LP_FILTER_ORDER]; // Transfer function coefficients 00917 00918 if (p->cur_frame_mode == MODE_12k2 || p->cur_frame_mode == MODE_10k2) { 00919 gamma_n = ff_pow_0_7; 00920 gamma_d = ff_pow_0_75; 00921 } else { 00922 gamma_n = ff_pow_0_55; 00923 gamma_d = ff_pow_0_7; 00924 } 00925 00926 for (i = 0; i < LP_FILTER_ORDER; i++) { 00927 lpc_n[i] = lpc[i] * gamma_n[i]; 00928 lpc_d[i] = lpc[i] * gamma_d[i]; 00929 } 00930 00931 memcpy(pole_out, p->postfilter_mem, sizeof(float) * LP_FILTER_ORDER); 00932 ff_celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples, 00933 AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); 00934 memcpy(p->postfilter_mem, pole_out + AMR_SUBFRAME_SIZE, 00935 sizeof(float) * LP_FILTER_ORDER); 00936 00937 ff_celp_lp_zero_synthesis_filterf(buf_out, lpc_n, 00938 pole_out + LP_FILTER_ORDER, 00939 AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); 00940 00941 ff_tilt_compensation(&p->tilt_mem, tilt_factor(lpc_n, lpc_d), buf_out, 00942 AMR_SUBFRAME_SIZE); 00943 00944 ff_adaptive_gain_control(buf_out, buf_out, speech_gain, AMR_SUBFRAME_SIZE, 00945 AMR_AGC_ALPHA, &p->postfilter_agc); 00946 } 00947 00949 00950 static int amrnb_decode_frame(AVCodecContext *avctx, void *data, int *data_size, 00951 AVPacket *avpkt) 00952 { 00953 00954 AMRContext *p = avctx->priv_data; // pointer to private data 00955 const uint8_t *buf = avpkt->data; 00956 int buf_size = avpkt->size; 00957 float *buf_out = data; // pointer to the output data buffer 00958 int i, subframe; 00959 float fixed_gain_factor; 00960 AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing 00961 float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing 00962 float synth_fixed_gain; // the fixed gain that synthesis should use 00963 const float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use 00964 00965 p->cur_frame_mode = unpack_bitstream(p, buf, buf_size); 00966 if (p->cur_frame_mode == MODE_DTX) { 00967 av_log_missing_feature(avctx, "dtx mode", 1); 00968 return -1; 00969 } 00970 00971 if (p->cur_frame_mode == MODE_12k2) { 00972 lsf2lsp_5(p); 00973 } else 00974 lsf2lsp_3(p); 00975 00976 for (i = 0; i < 4; i++) 00977 ff_acelp_lspd2lpc(p->lsp[i], p->lpc[i], 5); 00978 00979 for (subframe = 0; subframe < 4; subframe++) { 00980 const AMRNBSubframe *amr_subframe = &p->frame.subframe[subframe]; 00981 00982 decode_pitch_vector(p, amr_subframe, subframe); 00983 00984 decode_fixed_sparse(&fixed_sparse, amr_subframe->pulses, 00985 p->cur_frame_mode, subframe); 00986 00987 // The fixed gain (section 6.1.3) depends on the fixed vector 00988 // (section 6.1.2), but the fixed vector calculation uses 00989 // pitch sharpening based on the on the pitch gain (section 6.1.3). 00990 // So the correct order is: pitch gain, pitch sharpening, fixed gain. 00991 decode_gains(p, amr_subframe, p->cur_frame_mode, subframe, 00992 &fixed_gain_factor); 00993 00994 pitch_sharpening(p, subframe, p->cur_frame_mode, &fixed_sparse); 00995 00996 ff_set_fixed_vector(p->fixed_vector, &fixed_sparse, 1.0, 00997 AMR_SUBFRAME_SIZE); 00998 00999 p->fixed_gain[4] = 01000 ff_amr_set_fixed_gain(fixed_gain_factor, 01001 ff_dot_productf(p->fixed_vector, p->fixed_vector, 01002 AMR_SUBFRAME_SIZE)/AMR_SUBFRAME_SIZE, 01003 p->prediction_error, 01004 energy_mean[p->cur_frame_mode], energy_pred_fac); 01005 01006 // The excitation feedback is calculated without any processing such 01007 // as fixed gain smoothing. This isn't mentioned in the specification. 01008 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) 01009 p->excitation[i] *= p->pitch_gain[4]; 01010 ff_set_fixed_vector(p->excitation, &fixed_sparse, p->fixed_gain[4], 01011 AMR_SUBFRAME_SIZE); 01012 01013 // In the ref decoder, excitation is stored with no fractional bits. 01014 // This step prevents buzz in silent periods. The ref encoder can 01015 // emit long sequences with pitch factor greater than one. This 01016 // creates unwanted feedback if the excitation vector is nonzero. 01017 // (e.g. test sequence T19_795.COD in 3GPP TS 26.074) 01018 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) 01019 p->excitation[i] = truncf(p->excitation[i]); 01020 01021 // Smooth fixed gain. 01022 // The specification is ambiguous, but in the reference source, the 01023 // smoothed value is NOT fed back into later fixed gain smoothing. 01024 synth_fixed_gain = fixed_gain_smooth(p, p->lsf_q[subframe], 01025 p->lsf_avg, p->cur_frame_mode); 01026 01027 synth_fixed_vector = anti_sparseness(p, &fixed_sparse, p->fixed_vector, 01028 synth_fixed_gain, spare_vector); 01029 01030 if (synthesis(p, p->lpc[subframe], synth_fixed_gain, 01031 synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 0)) 01032 // overflow detected -> rerun synthesis scaling pitch vector down 01033 // by a factor of 4, skipping pitch vector contribution emphasis 01034 // and adaptive gain control 01035 synthesis(p, p->lpc[subframe], synth_fixed_gain, 01036 synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 1); 01037 01038 postfilter(p, p->lpc[subframe], buf_out + subframe * AMR_SUBFRAME_SIZE); 01039 01040 // update buffers and history 01041 ff_clear_fixed_vector(p->fixed_vector, &fixed_sparse, AMR_SUBFRAME_SIZE); 01042 update_state(p); 01043 } 01044 01045 ff_acelp_apply_order_2_transfer_function(buf_out, buf_out, highpass_zeros, 01046 highpass_poles, 01047 highpass_gain * AMR_SAMPLE_SCALE, 01048 p->high_pass_mem, AMR_BLOCK_SIZE); 01049 01050 /* Update averaged lsf vector (used for fixed gain smoothing). 01051 * 01052 * Note that lsf_avg should not incorporate the current frame's LSFs 01053 * for fixed_gain_smooth. 01054 * The specification has an incorrect formula: the reference decoder uses 01055 * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */ 01056 ff_weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], 01057 0.84, 0.16, LP_FILTER_ORDER); 01058 01059 /* report how many samples we got */ 01060 *data_size = AMR_BLOCK_SIZE * sizeof(float); 01061 01062 /* return the amount of bytes consumed if everything was OK */ 01063 return frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC 01064 } 01065 01066 01067 AVCodec amrnb_decoder = { 01068 .name = "amrnb", 01069 .type = AVMEDIA_TYPE_AUDIO, 01070 .id = CODEC_ID_AMR_NB, 01071 .priv_data_size = sizeof(AMRContext), 01072 .init = amrnb_decode_init, 01073 .decode = amrnb_decode_frame, 01074 .long_name = NULL_IF_CONFIG_SMALL("Adaptive Multi-Rate NarrowBand"), 01075 .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_FLT,SAMPLE_FMT_NONE}, 01076 };