blob: e5aa4ff364ab128404b99db35e561b988b248f88 [file] [log] [blame]
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include <math.h>
//#include <stdio.h>
#include <stdlib.h>
#include "noise_suppression.h"
#include "ns_core.h"
#include "windows_private.h"
#include "fft4g.h"
#include "signal_processing_library.h"
// Set Feature Extraction Parameters
void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) {
//bin size of histogram
inst->featureExtractionParams.binSizeLrt = (float)0.1;
inst->featureExtractionParams.binSizeSpecFlat = (float)0.05;
inst->featureExtractionParams.binSizeSpecDiff = (float)0.1;
//range of histogram over which lrt threshold is computed
inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0;
//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
// thresholds for prior model
inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff
inst->featureExtractionParams.factor2ModelPars = (float)0.9; //for spectral_flatness:
// used when noise is flatter than speech
//peak limit for spectral flatness (varies between 0 and 1)
inst->featureExtractionParams.thresPosSpecFlat = (float)0.6;
//limit on spacing of two highest peaks in histogram: spacing determined by bin size
inst->featureExtractionParams.limitPeakSpacingSpecFlat =
2 * inst->featureExtractionParams.binSizeSpecFlat;
inst->featureExtractionParams.limitPeakSpacingSpecDiff =
2 * inst->featureExtractionParams.binSizeSpecDiff;
//limit on relevance of second peak:
inst->featureExtractionParams.limitPeakWeightsSpecFlat = (float)0.5;
inst->featureExtractionParams.limitPeakWeightsSpecDiff = (float)0.5;
// fluctuation limit of lrt feature
inst->featureExtractionParams.thresFluctLrt = (float)0.05;
//limit on the max and min values for the feature thresholds
inst->featureExtractionParams.maxLrt = (float)1.0;
inst->featureExtractionParams.minLrt = (float)0.20;
inst->featureExtractionParams.maxSpecFlat = (float)0.95;
inst->featureExtractionParams.minSpecFlat = (float)0.10;
inst->featureExtractionParams.maxSpecDiff = (float)1.0;
inst->featureExtractionParams.minSpecDiff = (float)0.16;
//criteria of weight of histogram peak to accept/reject feature
inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3
* (inst->modelUpdatePars[1])); //for spectral flatness
inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3
* (inst->modelUpdatePars[1])); //for spectral difference
}
// Initialize state
int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs) {
int i;
//We only support 10ms frames
//check for valid pointer
if (inst == NULL) {
return -1;
}
// Initialization of struct
if (fs == 8000 || fs == 16000 || fs == 32000) {
inst->fs = fs;
} else {
return -1;
}
inst->windShift = 0;
if (fs == 8000) {
// We only support 10ms frames
inst->blockLen = 80;
inst->blockLen10ms = 80;
inst->anaLen = 128;
inst->window = kBlocks80w128;
inst->outLen = 0;
} else if (fs == 16000) {
// We only support 10ms frames
inst->blockLen = 160;
inst->blockLen10ms = 160;
inst->anaLen = 256;
inst->window = kBlocks160w256;
inst->outLen = 0;
} else if (fs == 32000) {
// We only support 10ms frames
inst->blockLen = 160;
inst->blockLen10ms = 160;
inst->anaLen = 256;
inst->window = kBlocks160w256;
inst->outLen = 0;
}
inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins
// Initialize fft work arrays.
inst->ip[0] = 0; // Setting this triggers initialization.
memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
WebRtc_rdft(inst->anaLen, 1, inst->dataBuf, inst->ip, inst->wfft);
memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
memset(inst->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
//for HB processing
memset(inst->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX);
//for quantile noise estimation
memset(inst->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL);
for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
inst->lquantile[i] = (float)8.0;
inst->density[i] = (float)0.3;
}
for (i = 0; i < SIMULT; i++) {
inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
}
inst->updates = 0;
// Wiener filter initialization
for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
inst->smooth[i] = (float)1.0;
}
// Set the aggressiveness: default
inst->aggrMode = 0;
//initialize variables for new method
inst->priorSpeechProb = (float)0.5; //prior prob for speech/noise
for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
inst->magnPrev[i] = (float)0.0; //previous mag spectrum
inst->noisePrev[i] = (float)0.0; //previous noise-spectrum
inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold)
inst->magnAvgPause[i] = (float)0.0; //conservative noise spectrum estimate
inst->speechProbHB[i] = (float)0.0; //for estimation of HB in second pass
inst->initMagnEst[i] = (float)0.0; //initial average mag spectrum
}
//feature quantities
inst->featureData[0] = SF_FEATURE_THR; //spectral flatness (start on threshold)
inst->featureData[1] = (float)0.0; //spectral entropy: not used in this version
inst->featureData[2] = (float)0.0; //spectral variance: not used in this version
inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold)
inst->featureData[4] = SF_FEATURE_THR; //spectral template diff (start on threshold)
inst->featureData[5] = (float)0.0; //normalization for spectral-diff
inst->featureData[6] = (float)0.0; //window time-average of input magnitude spectrum
//histogram quantities: used to estimate/update thresholds for features
for (i = 0; i < HIST_PAR_EST; i++) {
inst->histLrt[i] = 0;
inst->histSpecFlat[i] = 0;
inst->histSpecDiff[i] = 0;
}
inst->blockInd = -1; //frame counter
inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature
inst->priorModelPars[1] = (float)0.5; //threshold for spectral flatness:
// determined on-line
inst->priorModelPars[2] = (float)1.0; //sgn_map par for spectral measure:
// 1 for flatness measure
inst->priorModelPars[3] = (float)0.5; //threshold for template-difference feature:
// determined on-line
inst->priorModelPars[4] = (float)1.0; //default weighting parameter for lrt feature
inst->priorModelPars[5] = (float)0.0; //default weighting parameter for
// spectral flatness feature
inst->priorModelPars[6] = (float)0.0; //default weighting parameter for
// spectral difference feature
inst->modelUpdatePars[0] = 2; //update flag for parameters:
// 0 no update, 1=update once, 2=update every window
inst->modelUpdatePars[1] = 500; //window for update
inst->modelUpdatePars[2] = 0; //counter for update of conservative noise spectrum
//counter if the feature thresholds are updated during the sequence
inst->modelUpdatePars[3] = inst->modelUpdatePars[1];
inst->signalEnergy = 0.0;
inst->sumMagn = 0.0;
inst->whiteNoiseLevel = 0.0;
inst->pinkNoiseNumerator = 0.0;
inst->pinkNoiseExp = 0.0;
WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration
//default mode
WebRtcNs_set_policy_core(inst, 0);
memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX);
inst->initFlag = 1;
return 0;
}
int WebRtcNs_set_policy_core(NSinst_t* inst, int mode) {
// allow for modes:0,1,2,3
if (mode < 0 || mode > 3) {
return (-1);
}
inst->aggrMode = mode;
if (mode == 0) {
inst->overdrive = (float)1.0;
inst->denoiseBound = (float)0.5;
inst->gainmap = 0;
} else if (mode == 1) {
//inst->overdrive = (float)1.25;
inst->overdrive = (float)1.0;
inst->denoiseBound = (float)0.25;
inst->gainmap = 1;
} else if (mode == 2) {
//inst->overdrive = (float)1.25;
inst->overdrive = (float)1.1;
inst->denoiseBound = (float)0.125;
inst->gainmap = 1;
} else if (mode == 3) {
//inst->overdrive = (float)1.30;
inst->overdrive = (float)1.25;
inst->denoiseBound = (float)0.09;
inst->gainmap = 1;
}
return 0;
}
// Estimate noise
void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) {
int i, s, offset;
float lmagn[HALF_ANAL_BLOCKL], delta;
if (inst->updates < END_STARTUP_LONG) {
inst->updates++;
}
for (i = 0; i < inst->magnLen; i++) {
lmagn[i] = (float)log(magn[i]);
}
// loop over simultaneous estimates
for (s = 0; s < SIMULT; s++) {
offset = s * inst->magnLen;
// newquantest(...)
for (i = 0; i < inst->magnLen; i++) {
// compute delta
if (inst->density[offset + i] > 1.0) {
delta = FACTOR * (float)1.0 / inst->density[offset + i];
} else {
delta = FACTOR;
}
// update log quantile estimate
if (lmagn[i] > inst->lquantile[offset + i]) {
inst->lquantile[offset + i] += QUANTILE * delta
/ (float)(inst->counter[s] + 1);
} else {
inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta
/ (float)(inst->counter[s] + 1);
}
// update density estimate
if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) {
inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset
+ i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] + 1);
}
} // end loop over magnitude spectrum
if (inst->counter[s] >= END_STARTUP_LONG) {
inst->counter[s] = 0;
if (inst->updates >= END_STARTUP_LONG) {
for (i = 0; i < inst->magnLen; i++) {
inst->quantile[i] = (float)exp(inst->lquantile[offset + i]);
}
}
}
inst->counter[s]++;
} // end loop over simultaneous estimates
// Sequentially update the noise during startup
if (inst->updates < END_STARTUP_LONG) {
// Use the last "s" to get noise during startup that differ from zero.
for (i = 0; i < inst->magnLen; i++) {
inst->quantile[i] = (float)exp(inst->lquantile[offset + i]);
}
}
for (i = 0; i < inst->magnLen; i++) {
noise[i] = inst->quantile[i];
}
}
// Extract thresholds for feature parameters
// histograms are computed over some window_size (given by inst->modelUpdatePars[1])
// thresholds and weights are extracted every window
// flag 0 means update histogram only, flag 1 means compute the thresholds/weights
// threshold and weights are returned in: inst->priorModelPars
void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
int maxPeak1, maxPeak2;
int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff;
float binMid, featureSum;
float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl;
//3 features: lrt, flatness, difference
//lrt_feature = inst->featureData[3];
//flat_feature = inst->featureData[0];
//diff_feature = inst->featureData[4];
//update histograms
if (flag == 0) {
// LRT
if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt)
&& (inst->featureData[3] >= 0.0)) {
i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt);
inst->histLrt[i]++;
}
// Spectral flatness
if ((inst->featureData[0] < HIST_PAR_EST
* inst->featureExtractionParams.binSizeSpecFlat)
&& (inst->featureData[0] >= 0.0)) {
i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat);
inst->histSpecFlat[i]++;
}
// Spectral difference
if ((inst->featureData[4] < HIST_PAR_EST
* inst->featureExtractionParams.binSizeSpecDiff)
&& (inst->featureData[4] >= 0.0)) {
i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff);
inst->histSpecDiff[i]++;
}
}
// extract parameters for speech/noise probability
if (flag == 1) {
//lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt
avgHistLrt = 0.0;
avgHistLrtCompl = 0.0;
avgSquareHistLrt = 0.0;
numHistLrt = 0;
for (i = 0; i < HIST_PAR_EST; i++) {
binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt;
if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) {
avgHistLrt += inst->histLrt[i] * binMid;
numHistLrt += inst->histLrt[i];
}
avgSquareHistLrt += inst->histLrt[i] * binMid * binMid;
avgHistLrtCompl += inst->histLrt[i] * binMid;
}
if (numHistLrt > 0) {
avgHistLrt = avgHistLrt / ((float)numHistLrt);
}
avgHistLrtCompl = avgHistLrtCompl / ((float)inst->modelUpdatePars[1]);
avgSquareHistLrt = avgSquareHistLrt / ((float)inst->modelUpdatePars[1]);
fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl;
// get threshold for lrt feature:
if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) {
//very low fluct, so likely noise
inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt;
} else {
inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars
* avgHistLrt;
// check if value is within min/max range
if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) {
inst->priorModelPars[0] = inst->featureExtractionParams.minLrt;
}
if (inst->priorModelPars[0] > inst->featureExtractionParams.maxLrt) {
inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt;
}
}
// done with lrt feature
//
// for spectral flatness and spectral difference: compute the main peaks of histogram
maxPeak1 = 0;
maxPeak2 = 0;
posPeak1SpecFlat = 0.0;
posPeak2SpecFlat = 0.0;
weightPeak1SpecFlat = 0;
weightPeak2SpecFlat = 0;
// peaks for flatness
for (i = 0; i < HIST_PAR_EST; i++) {
binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat;
if (inst->histSpecFlat[i] > maxPeak1) {
// Found new "first" peak
maxPeak2 = maxPeak1;
weightPeak2SpecFlat = weightPeak1SpecFlat;
posPeak2SpecFlat = posPeak1SpecFlat;
maxPeak1 = inst->histSpecFlat[i];
weightPeak1SpecFlat = inst->histSpecFlat[i];
posPeak1SpecFlat = binMid;
} else if (inst->histSpecFlat[i] > maxPeak2) {
// Found new "second" peak
maxPeak2 = inst->histSpecFlat[i];
weightPeak2SpecFlat = inst->histSpecFlat[i];
posPeak2SpecFlat = binMid;
}
}
//compute two peaks for spectral difference
maxPeak1 = 0;
maxPeak2 = 0;
posPeak1SpecDiff = 0.0;
posPeak2SpecDiff = 0.0;
weightPeak1SpecDiff = 0;
weightPeak2SpecDiff = 0;
// peaks for spectral difference
for (i = 0; i < HIST_PAR_EST; i++) {
binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff;
if (inst->histSpecDiff[i] > maxPeak1) {
// Found new "first" peak
maxPeak2 = maxPeak1;
weightPeak2SpecDiff = weightPeak1SpecDiff;
posPeak2SpecDiff = posPeak1SpecDiff;
maxPeak1 = inst->histSpecDiff[i];
weightPeak1SpecDiff = inst->histSpecDiff[i];
posPeak1SpecDiff = binMid;
} else if (inst->histSpecDiff[i] > maxPeak2) {
// Found new "second" peak
maxPeak2 = inst->histSpecDiff[i];
weightPeak2SpecDiff = inst->histSpecDiff[i];
posPeak2SpecDiff = binMid;
}
}
// for spectrum flatness feature
useFeatureSpecFlat = 1;
// merge the two peaks if they are close
if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat)
< inst->featureExtractionParams.limitPeakSpacingSpecFlat)
&& (weightPeak2SpecFlat
> inst->featureExtractionParams.limitPeakWeightsSpecFlat
* weightPeak1SpecFlat)) {
weightPeak1SpecFlat += weightPeak2SpecFlat;
posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat);
}
//reject if weight of peaks is not large enough, or peak value too small
if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat
|| posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) {
useFeatureSpecFlat = 0;
}
// if selected, get the threshold
if (useFeatureSpecFlat == 1) {
// compute the threshold
inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars
* posPeak1SpecFlat;
//check if value is within min/max range
if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) {
inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat;
}
if (inst->priorModelPars[1] > inst->featureExtractionParams.maxSpecFlat) {
inst->priorModelPars[1] = inst->featureExtractionParams.maxSpecFlat;
}
}
// done with flatness feature
// for template feature
useFeatureSpecDiff = 1;
// merge the two peaks if they are close
if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff)
< inst->featureExtractionParams.limitPeakSpacingSpecDiff)
&& (weightPeak2SpecDiff
> inst->featureExtractionParams.limitPeakWeightsSpecDiff
* weightPeak1SpecDiff)) {
weightPeak1SpecDiff += weightPeak2SpecDiff;
posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff);
}
// get the threshold value
inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars
* posPeak1SpecDiff;
//reject if weight of peaks is not large enough
if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) {
useFeatureSpecDiff = 0;
}
//check if value is within min/max range
if (inst->priorModelPars[3] < inst->featureExtractionParams.minSpecDiff) {
inst->priorModelPars[3] = inst->featureExtractionParams.minSpecDiff;
}
if (inst->priorModelPars[3] > inst->featureExtractionParams.maxSpecDiff) {
inst->priorModelPars[3] = inst->featureExtractionParams.maxSpecDiff;
}
// done with spectral difference feature
// don't use template feature if fluctuation of lrt feature is very low:
// most likely just noise state
if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) {
useFeatureSpecDiff = 0;
}
// select the weights between the features
// inst->priorModelPars[4] is weight for lrt: always selected
// inst->priorModelPars[5] is weight for spectral flatness
// inst->priorModelPars[6] is weight for spectral difference
featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff);
inst->priorModelPars[4] = (float)1.0 / featureSum;
inst->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum;
inst->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum;
// set hists to zero for next update
if (inst->modelUpdatePars[0] >= 1) {
for (i = 0; i < HIST_PAR_EST; i++) {
inst->histLrt[i] = 0;
inst->histSpecFlat[i] = 0;
inst->histSpecDiff[i] = 0;
}
}
} // end of flag == 1
}
// Compute spectral flatness on input spectrum
// magnIn is the magnitude spectrum
// spectral flatness is returned in inst->featureData[0]
void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) {
int i;
int shiftLP = 1; //option to remove first bin(s) from spectral measures
float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp;
// comute spectral measures
// for flatness
avgSpectralFlatnessNum = 0.0;
avgSpectralFlatnessDen = inst->sumMagn;
for (i = 0; i < shiftLP; i++) {
avgSpectralFlatnessDen -= magnIn[i];
}
// compute log of ratio of the geometric to arithmetic mean: check for log(0) case
for (i = shiftLP; i < inst->magnLen; i++) {
if (magnIn[i] > 0.0) {
avgSpectralFlatnessNum += (float)log(magnIn[i]);
} else {
inst->featureData[0] -= SPECT_FL_TAVG * inst->featureData[0];
return;
}
}
//normalize
avgSpectralFlatnessDen = avgSpectralFlatnessDen / inst->magnLen;
avgSpectralFlatnessNum = avgSpectralFlatnessNum / inst->magnLen;
//ratio and inverse log: check for case of log(0)
spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen;
//time-avg update of spectral flatness feature
inst->featureData[0] += SPECT_FL_TAVG * (spectralTmp - inst->featureData[0]);
// done with flatness feature
}
// Compute the difference measure between input spectrum and a template/learned noise spectrum
// magnIn is the input spectrum
// the reference/template spectrum is inst->magnAvgPause[i]
// returns (normalized) spectral difference in inst->featureData[4]
void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
// avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause)
int i;
float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
avgPause = 0.0;
avgMagn = inst->sumMagn;
// compute average quantities
for (i = 0; i < inst->magnLen; i++) {
//conservative smooth noise spectrum from pause frames
avgPause += inst->magnAvgPause[i];
}
avgPause = avgPause / ((float)inst->magnLen);
avgMagn = avgMagn / ((float)inst->magnLen);
covMagnPause = 0.0;
varPause = 0.0;
varMagn = 0.0;
// compute variance and covariance quantities
for (i = 0; i < inst->magnLen; i++) {
covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause);
varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause);
varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn);
}
covMagnPause = covMagnPause / ((float)inst->magnLen);
varPause = varPause / ((float)inst->magnLen);
varMagn = varMagn / ((float)inst->magnLen);
// update of average magnitude spectrum
inst->featureData[6] += inst->signalEnergy;
avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001);
// normalize and compute time-avg update of difference feature
avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001));
inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]);
}
// Compute speech/noise probability
// speech/noise probability is returned in: probSpeechFinal
//magn is the input magnitude spectrum
//noise is the noise spectrum
//snrLocPrior is the prior snr for each freq.
//snr loc_post is the post snr for each freq.
void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snrLocPrior,
float* snrLocPost) {
int i, sgnMap;
float invLrt, gainPrior, indPrior;
float logLrtTimeAvgKsum, besselTmp;
float indicator0, indicator1, indicator2;
float tmpFloat1, tmpFloat2;
float weightIndPrior0, weightIndPrior1, weightIndPrior2;
float threshPrior0, threshPrior1, threshPrior2;
float widthPrior, widthPrior0, widthPrior1, widthPrior2;
widthPrior0 = WIDTH_PR_MAP;
widthPrior1 = (float)2.0 * WIDTH_PR_MAP; //width for pause region:
// lower range, so increase width in tanh map
widthPrior2 = (float)2.0 * WIDTH_PR_MAP; //for spectral-difference measure
//threshold parameters for features
threshPrior0 = inst->priorModelPars[0];
threshPrior1 = inst->priorModelPars[1];
threshPrior2 = inst->priorModelPars[3];
//sign for flatness feature
sgnMap = (int)(inst->priorModelPars[2]);
//weight parameters for features
weightIndPrior0 = inst->priorModelPars[4];
weightIndPrior1 = inst->priorModelPars[5];
weightIndPrior2 = inst->priorModelPars[6];
// compute feature based on average LR factor
// this is the average over all frequencies of the smooth log lrt
logLrtTimeAvgKsum = 0.0;
for (i = 0; i < inst->magnLen; i++) {
tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i];
tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001);
besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2;
inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1)
- inst->logLrtTimeAvg[i]);
logLrtTimeAvgKsum += inst->logLrtTimeAvg[i];
}
logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen);
inst->featureData[3] = logLrtTimeAvgKsum;
// done with computation of LR factor
//
//compute the indicator functions
//
// average lrt feature
widthPrior = widthPrior0;
//use larger width in tanh map for pause regions
if (logLrtTimeAvgKsum < threshPrior0) {
widthPrior = widthPrior1;
}
// compute indicator function: sigmoid map
indicator0 = (float)0.5 * ((float)tanh(widthPrior *
(logLrtTimeAvgKsum - threshPrior0)) + (float)1.0);
//spectral flatness feature
tmpFloat1 = inst->featureData[0];
widthPrior = widthPrior0;
//use larger width in tanh map for pause regions
if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) {
widthPrior = widthPrior1;
}
if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) {
widthPrior = widthPrior1;
}
// compute indicator function: sigmoid map
indicator1 = (float)0.5 * ((float)tanh((float)sgnMap *
widthPrior * (threshPrior1 - tmpFloat1)) + (float)1.0);
//for template spectrum-difference
tmpFloat1 = inst->featureData[4];
widthPrior = widthPrior0;
//use larger width in tanh map for pause regions
if (tmpFloat1 < threshPrior2) {
widthPrior = widthPrior2;
}
// compute indicator function: sigmoid map
indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2))
+ (float)1.0);
//combine the indicator function with the feature weights
indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2
* indicator2;
// done with computing indicator function
//compute the prior probability
inst->priorSpeechProb += PRIOR_UPDATE * (indPrior - inst->priorSpeechProb);
// make sure probabilities are within range: keep floor to 0.01
if (inst->priorSpeechProb > 1.0) {
inst->priorSpeechProb = (float)1.0;
}
if (inst->priorSpeechProb < 0.01) {
inst->priorSpeechProb = (float)0.01;
}
//final speech probability: combine prior model with LR factor:
gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001);
for (i = 0; i < inst->magnLen; i++) {
invLrt = (float)exp(-inst->logLrtTimeAvg[i]);
invLrt = (float)gainPrior * invLrt;
probSpeechFinal[i] = (float)1.0 / ((float)1.0 + invLrt);
}
}
int WebRtcNs_ProcessCore(NSinst_t* inst,
short* speechFrame,
short* speechFrameHB,
short* outFrame,
short* outFrameHB) {
// main routine for noise reduction
int flagHB = 0;
int i;
const int kStartBand = 5; // Skip first frequency bins during estimation.
int updateParsFlag;
float energy1, energy2, gain, factor, factor1, factor2;
float signalEnergy, sumMagn;
float snrPrior, currentEstimateStsa;
float tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech;
float gammaNoiseTmp, gammaNoiseOld;
float noiseUpdateTmp, fTmp, dTmp;
float fin[BLOCKL_MAX], fout[BLOCKL_MAX];
float winData[ANAL_BLOCKL_MAX];
float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL];
float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL];
float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL];
float probSpeechFinal[HALF_ANAL_BLOCKL], previousEstimateStsa[HALF_ANAL_BLOCKL];
float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
// Variables during startup
float sum_log_i = 0.0;
float sum_log_i_square = 0.0;
float sum_log_magn = 0.0;
float sum_log_i_log_magn = 0.0;
float parametric_noise = 0.0;
float parametric_exp = 0.0;
float parametric_num = 0.0;
// SWB variables
int deltaBweHB = 1;
int deltaGainHB = 1;
float decayBweHB = 1.0;
float gainMapParHB = 1.0;
float gainTimeDomainHB = 1.0;
float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB;
// Check that initiation has been done
if (inst->initFlag != 1) {
return (-1);
}
// Check for valid pointers based on sampling rate
if (inst->fs == 32000) {
if (speechFrameHB == NULL) {
return -1;
}
flagHB = 1;
// range for averaging low band quantities for H band gain
deltaBweHB = (int)inst->magnLen / 4;
deltaGainHB = deltaBweHB;
}
//
updateParsFlag = inst->modelUpdatePars[0];
//
//for LB do all processing
// convert to float
for (i = 0; i < inst->blockLen10ms; i++) {
fin[i] = (float)speechFrame[i];
}
// update analysis buffer for L band
memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms,
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, fin,
sizeof(float) * inst->blockLen10ms);
if (flagHB == 1) {
// convert to float
for (i = 0; i < inst->blockLen10ms; i++) {
fin[i] = (float)speechFrameHB[i];
}
// update analysis buffer for H band
memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms,
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, fin,
sizeof(float) * inst->blockLen10ms);
}
// check if processing needed
if (inst->outLen == 0) {
// windowing
energy1 = 0.0;
for (i = 0; i < inst->anaLen; i++) {
winData[i] = inst->window[i] * inst->dataBuf[i];
energy1 += winData[i] * winData[i];
}
if (energy1 == 0.0) {
// synthesize the special case of zero input
// we want to avoid updating statistics in this case:
// Updating feature statistics when we have zeros only will cause thresholds to
// move towards zero signal situations. This in turn has the effect that once the
// signal is "turned on" (non-zero values) everything will be treated as speech
// and there is no noise suppression effect. Depending on the duration of the
// inactive signal it takes a considerable amount of time for the system to learn
// what is noise and what is speech.
// read out fully processed segment
for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) {
fout[i - inst->windShift] = inst->syntBuf[i];
}
// update synthesis buffer
memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen,
sizeof(float) * (inst->anaLen - inst->blockLen));
memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0,
sizeof(float) * inst->blockLen);
// out buffer
inst->outLen = inst->blockLen - inst->blockLen10ms;
if (inst->blockLen > inst->blockLen10ms) {
for (i = 0; i < inst->outLen; i++) {
inst->outBuf[i] = fout[i + inst->blockLen10ms];
}
}
// convert to short
for (i = 0; i < inst->blockLen10ms; i++) {
dTmp = fout[i];
if (dTmp < WEBRTC_SPL_WORD16_MIN) {
dTmp = WEBRTC_SPL_WORD16_MIN;
} else if (dTmp > WEBRTC_SPL_WORD16_MAX) {
dTmp = WEBRTC_SPL_WORD16_MAX;
}
outFrame[i] = (short)dTmp;
}
// for time-domain gain of HB
if (flagHB == 1) {
for (i = 0; i < inst->blockLen10ms; i++) {
dTmp = inst->dataBufHB[i];
if (dTmp < WEBRTC_SPL_WORD16_MIN) {
dTmp = WEBRTC_SPL_WORD16_MIN;
} else if (dTmp > WEBRTC_SPL_WORD16_MAX) {
dTmp = WEBRTC_SPL_WORD16_MAX;
}
outFrameHB[i] = (short)dTmp;
}
} // end of H band gain computation
//
return 0;
}
//
inst->blockInd++; // Update the block index only when we process a block.
// FFT
WebRtc_rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft);
imag[0] = 0;
real[0] = winData[0];
magn[0] = (float)(fabs(real[0]) + 1.0f);
imag[inst->magnLen - 1] = 0;
real[inst->magnLen - 1] = winData[1];
magn[inst->magnLen - 1] = (float)(fabs(real[inst->magnLen - 1]) + 1.0f);
signalEnergy = (float)(real[0] * real[0]) +
(float)(real[inst->magnLen - 1] * real[inst->magnLen - 1]);
sumMagn = magn[0] + magn[inst->magnLen - 1];
if (inst->blockInd < END_STARTUP_SHORT) {
inst->initMagnEst[0] += magn[0];
inst->initMagnEst[inst->magnLen - 1] += magn[inst->magnLen - 1];
tmpFloat2 = log((float)(inst->magnLen - 1));
sum_log_i = tmpFloat2;
sum_log_i_square = tmpFloat2 * tmpFloat2;
tmpFloat1 = log(magn[inst->magnLen - 1]);
sum_log_magn = tmpFloat1;
sum_log_i_log_magn = tmpFloat2 * tmpFloat1;
}
for (i = 1; i < inst->magnLen - 1; i++) {
real[i] = winData[2 * i];
imag[i] = winData[2 * i + 1];
// magnitude spectrum
fTmp = real[i] * real[i];
fTmp += imag[i] * imag[i];
signalEnergy += fTmp;
magn[i] = ((float)sqrt(fTmp)) + 1.0f;
sumMagn += magn[i];
if (inst->blockInd < END_STARTUP_SHORT) {
inst->initMagnEst[i] += magn[i];
if (i >= kStartBand) {
tmpFloat2 = log((float)i);
sum_log_i += tmpFloat2;
sum_log_i_square += tmpFloat2 * tmpFloat2;
tmpFloat1 = log(magn[i]);
sum_log_magn += tmpFloat1;
sum_log_i_log_magn += tmpFloat2 * tmpFloat1;
}
}
}
signalEnergy = signalEnergy / ((float)inst->magnLen);
inst->signalEnergy = signalEnergy;
inst->sumMagn = sumMagn;
//compute spectral flatness on input spectrum
WebRtcNs_ComputeSpectralFlatness(inst, magn);
// quantile noise estimate
WebRtcNs_NoiseEstimation(inst, magn, noise);
//compute simplified noise model during startup
if (inst->blockInd < END_STARTUP_SHORT) {
// Estimate White noise
inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive;
// Estimate Pink noise parameters
tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand));
tmpFloat1 -= (sum_log_i * sum_log_i);
tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
tmpFloat3 = tmpFloat2 / tmpFloat1;
// Constrain the estimated spectrum to be positive
if (tmpFloat3 < 0.0f) {
tmpFloat3 = 0.0f;
}
inst->pinkNoiseNumerator += tmpFloat3;
tmpFloat2 = (sum_log_i * sum_log_magn);
tmpFloat2 -= ((float)(inst->magnLen - kStartBand)) * sum_log_i_log_magn;
tmpFloat3 = tmpFloat2 / tmpFloat1;
// Constrain the pink noise power to be in the interval [0, 1];
if (tmpFloat3 < 0.0f) {
tmpFloat3 = 0.0f;
}
if (tmpFloat3 > 1.0f) {
tmpFloat3 = 1.0f;
}
inst->pinkNoiseExp += tmpFloat3;
// Calculate frequency independent parts of parametric noise estimate.
if (inst->pinkNoiseExp == 0.0f) {
// Use white noise estimate
parametric_noise = inst->whiteNoiseLevel;
} else {
// Use pink noise estimate
parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1));
parametric_num *= (float)(inst->blockInd + 1);
parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1);
parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp);
}
for (i = 0; i < inst->magnLen; i++) {
// Estimate the background noise using the white and pink noise parameters
if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) {
// Use pink noise estimate
parametric_noise = parametric_num / pow((float)i, parametric_exp);
}
theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise);
theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001);
// Weight quantile noise with modeled noise
noise[i] *= (inst->blockInd);
tmpFloat2 = parametric_noise * (END_STARTUP_SHORT - inst->blockInd);
noise[i] += (tmpFloat2 / (float)(inst->blockInd + 1));
noise[i] /= END_STARTUP_SHORT;
}
}
//compute average signal during END_STARTUP_LONG time:
// used to normalize spectral difference measure
if (inst->blockInd < END_STARTUP_LONG) {
inst->featureData[5] *= inst->blockInd;
inst->featureData[5] += signalEnergy;
inst->featureData[5] /= (inst->blockInd + 1);
}
#ifdef PROCESS_FLOW_0
if (inst->blockInd > END_STARTUP_LONG) {
//option: average the quantile noise: for check with AEC2
for (i = 0; i < inst->magnLen; i++) {
noise[i] = (float)0.6 * inst->noisePrev[i] + (float)0.4 * noise[i];
}
for (i = 0; i < inst->magnLen; i++) {
// Wiener with over sub-substraction:
theFilter[i] = (magn[i] - inst->overdrive * noise[i]) / (magn[i] + (float)0.0001);
}
}
#else
//start processing at frames == converged+1
//
// STEP 1: compute prior and post snr based on quantile noise est
//
// compute DD estimate of prior SNR: needed for new method
for (i = 0; i < inst->magnLen; i++) {
// post snr
snrLocPost[i] = (float)0.0;
if (magn[i] > noise[i]) {
snrLocPost[i] = magn[i] / (noise[i] + (float)0.0001) - (float)1.0;
}
// previous post snr
// previous estimate: based on previous frame with gain filter
previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001)
* (inst->smooth[i]);
// DD estimate is sum of two terms: current estimate and previous estimate
// directed decision update of snrPrior
snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR)
* snrLocPost[i];
// post and prior snr needed for step 2
} // end of loop over freqs
#ifdef PROCESS_FLOW_1
for (i = 0; i < inst->magnLen; i++) {
// gain filter
tmpFloat1 = inst->overdrive + snrLocPrior[i];
tmpFloat2 = (float)snrLocPrior[i] / tmpFloat1;
theFilter[i] = (float)tmpFloat2;
} // end of loop over freqs
#endif
// done with step 1: dd computation of prior and post snr
//
//STEP 2: compute speech/noise likelihood
//
#ifdef PROCESS_FLOW_2
// compute difference of input spectrum with learned/estimated noise spectrum
WebRtcNs_ComputeSpectralDifference(inst, magn);
// compute histograms for parameter decisions (thresholds and weights for features)
// parameters are extracted once every window time (=inst->modelUpdatePars[1])
if (updateParsFlag >= 1) {
// counter update
inst->modelUpdatePars[3]--;
// update histogram
if (inst->modelUpdatePars[3] > 0) {
WebRtcNs_FeatureParameterExtraction(inst, 0);
}
// compute model parameters
if (inst->modelUpdatePars[3] == 0) {
WebRtcNs_FeatureParameterExtraction(inst, 1);
inst->modelUpdatePars[3] = inst->modelUpdatePars[1];
// if wish to update only once, set flag to zero
if (updateParsFlag == 1) {
inst->modelUpdatePars[0] = 0;
} else {
// update every window:
// get normalization for spectral difference for next window estimate
inst->featureData[6] = inst->featureData[6]
/ ((float)inst->modelUpdatePars[1]);
inst->featureData[5] = (float)0.5 * (inst->featureData[6]
+ inst->featureData[5]);
inst->featureData[6] = (float)0.0;
}
}
}
// compute speech/noise probability
WebRtcNs_SpeechNoiseProb(inst, probSpeechFinal, snrLocPrior, snrLocPost);
// time-avg parameter for noise update
gammaNoiseTmp = NOISE_UPDATE;
for (i = 0; i < inst->magnLen; i++) {
probSpeech = probSpeechFinal[i];
probNonSpeech = (float)1.0 - probSpeech;
// temporary noise update:
// use it for speech frames if update value is less than previous
noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp)
* (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
//
// time-constant based on speech/noise state
gammaNoiseOld = gammaNoiseTmp;
gammaNoiseTmp = NOISE_UPDATE;
// increase gamma (i.e., less noise update) for frame likely to be speech
if (probSpeech > PROB_RANGE) {
gammaNoiseTmp = SPEECH_UPDATE;
}
// conservative noise update
if (probSpeech < PROB_RANGE) {
inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]);
}
// noise update
if (gammaNoiseTmp == gammaNoiseOld) {
noise[i] = noiseUpdateTmp;
} else {
noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp)
* (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
// allow for noise update downwards:
// if noise update decreases the noise, it is safe, so allow it to happen
if (noiseUpdateTmp < noise[i]) {
noise[i] = noiseUpdateTmp;
}
}
} // end of freq loop
// done with step 2: noise update
//
// STEP 3: compute dd update of prior snr and post snr based on new noise estimate
//
for (i = 0; i < inst->magnLen; i++) {
// post and prior snr
currentEstimateStsa = (float)0.0;
if (magn[i] > noise[i]) {
currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0;
}
// DD estimate is sume of two terms: current estimate and previous estimate
// directed decision update of snrPrior
snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR)
* currentEstimateStsa;
// gain filter
tmpFloat1 = inst->overdrive + snrPrior;
tmpFloat2 = (float)snrPrior / tmpFloat1;
theFilter[i] = (float)tmpFloat2;
} // end of loop over freqs
// done with step3
#endif
#endif
for (i = 0; i < inst->magnLen; i++) {
// flooring bottom
if (theFilter[i] < inst->denoiseBound) {
theFilter[i] = inst->denoiseBound;
}
// flooring top
if (theFilter[i] > (float)1.0) {
theFilter[i] = 1.0;
}
if (inst->blockInd < END_STARTUP_SHORT) {
// flooring bottom
if (theFilterTmp[i] < inst->denoiseBound) {
theFilterTmp[i] = inst->denoiseBound;
}
// flooring top
if (theFilterTmp[i] > (float)1.0) {
theFilterTmp[i] = 1.0;
}
// Weight the two suppression filters
theFilter[i] *= (inst->blockInd);
theFilterTmp[i] *= (END_STARTUP_SHORT - inst->blockInd);
theFilter[i] += theFilterTmp[i];
theFilter[i] /= (END_STARTUP_SHORT);
}
// smoothing
#ifdef PROCESS_FLOW_0
inst->smooth[i] *= SMOOTH; // value set to 0.7 in define.h file
inst->smooth[i] += ((float)1.0 - SMOOTH) * theFilter[i];
#else
inst->smooth[i] = theFilter[i];
#endif
real[i] *= inst->smooth[i];
imag[i] *= inst->smooth[i];
}
// keep track of noise and magn spectrum for next frame
for (i = 0; i < inst->magnLen; i++) {
inst->noisePrev[i] = noise[i];
inst->magnPrev[i] = magn[i];
}
// back to time domain
winData[0] = real[0];
winData[1] = real[inst->magnLen - 1];
for (i = 1; i < inst->magnLen - 1; i++) {
winData[2 * i] = real[i];
winData[2 * i + 1] = imag[i];
}
WebRtc_rdft(inst->anaLen, -1, winData, inst->ip, inst->wfft);
for (i = 0; i < inst->anaLen; i++) {
real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling
}
//scale factor: only do it after END_STARTUP_LONG time
factor = (float)1.0;
if (inst->gainmap == 1 && inst->blockInd > END_STARTUP_LONG) {
factor1 = (float)1.0;
factor2 = (float)1.0;
energy2 = 0.0;
for (i = 0; i < inst->anaLen; i++) {
energy2 += (float)real[i] * (float)real[i];
}
gain = (float)sqrt(energy2 / (energy1 + (float)1.0));
#ifdef PROCESS_FLOW_2
// scaling for new version
if (gain > B_LIM) {
factor1 = (float)1.0 + (float)1.3 * (gain - B_LIM);
if (gain * factor1 > (float)1.0) {
factor1 = (float)1.0 / gain;
}
}
if (gain < B_LIM) {
//don't reduce scale too much for pause regions:
// attenuation here should be controlled by flooring
if (gain <= inst->denoiseBound) {
gain = inst->denoiseBound;
}
factor2 = (float)1.0 - (float)0.3 * (B_LIM - gain);
}
//combine both scales with speech/noise prob:
// note prior (priorSpeechProb) is not frequency dependent
factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb)
* factor2;
#else
if (gain > B_LIM) {
factor = (float)1.0 + (float)1.3 * (gain - B_LIM);
} else {
factor = (float)1.0 + (float)2.0 * (gain - B_LIM);
}
if (gain * factor > (float)1.0) {
factor = (float)1.0 / gain;
}
#endif
} // out of inst->gainmap==1
// synthesis
for (i = 0; i < inst->anaLen; i++) {
inst->syntBuf[i] += factor * inst->window[i] * (float)real[i];
}
// read out fully processed segment
for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) {
fout[i - inst->windShift] = inst->syntBuf[i];
}
// update synthesis buffer
memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen,
sizeof(float) * (inst->anaLen - inst->blockLen));
memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0,
sizeof(float) * inst->blockLen);
// out buffer
inst->outLen = inst->blockLen - inst->blockLen10ms;
if (inst->blockLen > inst->blockLen10ms) {
for (i = 0; i < inst->outLen; i++) {
inst->outBuf[i] = fout[i + inst->blockLen10ms];
}
}
} // end of if out.len==0
else {
for (i = 0; i < inst->blockLen10ms; i++) {
fout[i] = inst->outBuf[i];
}
memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms,
sizeof(float) * (inst->outLen - inst->blockLen10ms));
memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0,
sizeof(float) * inst->blockLen10ms);
inst->outLen -= inst->blockLen10ms;
}
// convert to short
for (i = 0; i < inst->blockLen10ms; i++) {
dTmp = fout[i];
if (dTmp < WEBRTC_SPL_WORD16_MIN) {
dTmp = WEBRTC_SPL_WORD16_MIN;
} else if (dTmp > WEBRTC_SPL_WORD16_MAX) {
dTmp = WEBRTC_SPL_WORD16_MAX;
}
outFrame[i] = (short)dTmp;
}
// for time-domain gain of HB
if (flagHB == 1) {
for (i = 0; i < inst->magnLen; i++) {
inst->speechProbHB[i] = probSpeechFinal[i];
}
// average speech prob from low band
// avg over second half (i.e., 4->8kHz) of freq. spectrum
avgProbSpeechHB = 0.0;
for (i = inst->magnLen - deltaBweHB - 1; i < inst->magnLen - 1; i++) {
avgProbSpeechHB += inst->speechProbHB[i];
}
avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB);
// average filter gain from low band
// average over second half (i.e., 4->8kHz) of freq. spectrum
avgFilterGainHB = 0.0;
for (i = inst->magnLen - deltaGainHB - 1; i < inst->magnLen - 1; i++) {
avgFilterGainHB += inst->smooth[i];
}
avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB));
avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0;
// gain based on speech prob:
gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
//combine gain with low band gain
gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB;
if (avgProbSpeechHB >= (float)0.5) {
gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB;
}
gainTimeDomainHB = gainTimeDomainHB * decayBweHB;
//make sure gain is within flooring range
// flooring bottom
if (gainTimeDomainHB < inst->denoiseBound) {
gainTimeDomainHB = inst->denoiseBound;
}
// flooring top
if (gainTimeDomainHB > (float)1.0) {
gainTimeDomainHB = 1.0;
}
//apply gain
for (i = 0; i < inst->blockLen10ms; i++) {
dTmp = gainTimeDomainHB * inst->dataBufHB[i];
if (dTmp < WEBRTC_SPL_WORD16_MIN) {
dTmp = WEBRTC_SPL_WORD16_MIN;
} else if (dTmp > WEBRTC_SPL_WORD16_MAX) {
dTmp = WEBRTC_SPL_WORD16_MAX;
}
outFrameHB[i] = (short)dTmp;
}
} // end of H band gain computation
//
return 0;
}