add ns code.
This commit is contained in:
parent
43166b5474
commit
35bf68338f
@ -1,5 +1,5 @@
|
||||
#include "IoContext.h"
|
||||
#include "RKAP_3A.h"
|
||||
// #include "RKAP_3A.h"
|
||||
#include "SpeexDsp.h"
|
||||
#include "WebRtcAecm.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
@ -18,7 +18,7 @@ public:
|
||||
farendBuffer = std::make_unique<webrtc::AudioBuffer>(sampleRate, channels, sampleRate, channels, sampleRate, channels);
|
||||
linearOutputBuffer = std::make_unique<webrtc::AudioBuffer>(sampleRate, channels, sampleRate, channels, sampleRate, channels);
|
||||
|
||||
RKAP_3A_Init(&m_vqe, AEC_TX_TYPE);
|
||||
// RKAP_3A_Init(&m_vqe, AEC_TX_TYPE);
|
||||
}
|
||||
|
||||
std::unique_ptr<webrtc::EchoControl> echoCanceller;
|
||||
@ -26,7 +26,7 @@ public:
|
||||
std::unique_ptr<webrtc::AudioBuffer> farendBuffer;
|
||||
std::unique_ptr<webrtc::AudioBuffer> linearOutputBuffer;
|
||||
|
||||
RKAP_AEC_State m_vqe;
|
||||
// RKAP_AEC_State m_vqe;
|
||||
};
|
||||
|
||||
ProcessFileTask::ProcessFileTask() : m_d{new ProcessFileTaskPrivate()} {
|
||||
|
@ -55,6 +55,7 @@ add_library(VocieProcess
|
||||
|
||||
common_audio/third_party/ooura/fft_size_128/ooura_fft.h common_audio/third_party/ooura/fft_size_128/ooura_fft_neon.cc
|
||||
common_audio/third_party/ooura/fft_size_128/ooura_fft.cc
|
||||
common_audio/third_party/ooura/fft_size_256/fft4g.h common_audio/third_party/ooura/fft_size_256/fft4g.cc
|
||||
common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
|
||||
|
||||
rtc_base/checks.h rtc_base/checks.cc
|
||||
@ -147,6 +148,20 @@ add_library(VocieProcess
|
||||
|
||||
modules/audio_processing/logging/apm_data_dumper.h modules/audio_processing/logging/apm_data_dumper.cc
|
||||
|
||||
modules/audio_processing/ns/fast_math.h modules/audio_processing/ns/fast_math.cc
|
||||
modules/audio_processing/ns/histograms.h modules/audio_processing/ns/histograms.cc
|
||||
modules/audio_processing/ns/noise_estimator.h modules/audio_processing/ns/noise_estimator.cc
|
||||
modules/audio_processing/ns/noise_suppressor.h modules/audio_processing/ns/noise_suppressor.cc
|
||||
modules/audio_processing/ns/ns_fft.h modules/audio_processing/ns/ns_fft.cc
|
||||
modules/audio_processing/ns/prior_signal_model_estimator.h modules/audio_processing/ns/prior_signal_model_estimator.cc
|
||||
modules/audio_processing/ns/prior_signal_model.h modules/audio_processing/ns/prior_signal_model.cc
|
||||
modules/audio_processing/ns/quantile_noise_estimator.h modules/audio_processing/ns/quantile_noise_estimator.cc
|
||||
modules/audio_processing/ns/signal_model_estimator.h modules/audio_processing/ns/signal_model_estimator.cc
|
||||
modules/audio_processing/ns/signal_model.h modules/audio_processing/ns/signal_model.cc
|
||||
modules/audio_processing/ns/speech_probability_estimator.h modules/audio_processing/ns/speech_probability_estimator.cc
|
||||
modules/audio_processing/ns/suppression_params.h modules/audio_processing/ns/suppression_params.cc
|
||||
modules/audio_processing/ns/wiener_filter.h modules/audio_processing/ns/wiener_filter.cc
|
||||
|
||||
modules/audio_processing/utility/cascaded_biquad_filter.h modules/audio_processing/utility/cascaded_biquad_filter.cc
|
||||
modules/audio_processing/utility/delay_estimator_wrapper.h modules/audio_processing/utility/delay_estimator_wrapper.cc
|
||||
modules/audio_processing/utility/delay_estimator.h modules/audio_processing/utility/delay_estimator.cc
|
||||
|
866
VocieProcess/common_audio/third_party/ooura/fft_size_256/fft4g.cc
vendored
Normal file
866
VocieProcess/common_audio/third_party/ooura/fft_size_256/fft4g.cc
vendored
Normal file
@ -0,0 +1,866 @@
|
||||
/*
|
||||
* http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
|
||||
* Copyright Takuya OOURA, 1996-2001
|
||||
*
|
||||
* You may use, copy, modify and distribute this code for any purpose (include
|
||||
* commercial use) and without fee. Please refer to this package when you modify
|
||||
* this code.
|
||||
*
|
||||
* Changes:
|
||||
* Trivial type modifications by the WebRTC authors.
|
||||
*/
|
||||
|
||||
/*
|
||||
Fast Fourier/Cosine/Sine Transform
|
||||
dimension :one
|
||||
data length :power of 2
|
||||
decimation :frequency
|
||||
radix :4, 2
|
||||
data :inplace
|
||||
table :use
|
||||
functions
|
||||
cdft: Complex Discrete Fourier Transform
|
||||
rdft: Real Discrete Fourier Transform
|
||||
ddct: Discrete Cosine Transform
|
||||
ddst: Discrete Sine Transform
|
||||
dfct: Cosine Transform of RDFT (Real Symmetric DFT)
|
||||
dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
|
||||
function prototypes
|
||||
void cdft(int, int, float *, int *, float *);
|
||||
void rdft(size_t, int, float *, size_t *, float *);
|
||||
void ddct(int, int, float *, int *, float *);
|
||||
void ddst(int, int, float *, int *, float *);
|
||||
void dfct(int, float *, float *, int *, float *);
|
||||
void dfst(int, float *, float *, int *, float *);
|
||||
|
||||
|
||||
-------- Complex DFT (Discrete Fourier Transform) --------
|
||||
[definition]
|
||||
<case1>
|
||||
X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
|
||||
<case2>
|
||||
X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
|
||||
(notes: sum_j=0^n-1 is a summation from j=0 to n-1)
|
||||
[usage]
|
||||
<case1>
|
||||
ip[0] = 0; // first time only
|
||||
cdft(2*n, 1, a, ip, w);
|
||||
<case2>
|
||||
ip[0] = 0; // first time only
|
||||
cdft(2*n, -1, a, ip, w);
|
||||
[parameters]
|
||||
2*n :data length (int)
|
||||
n >= 1, n = power of 2
|
||||
a[0...2*n-1] :input/output data (float *)
|
||||
input data
|
||||
a[2*j] = Re(x[j]),
|
||||
a[2*j+1] = Im(x[j]), 0<=j<n
|
||||
output data
|
||||
a[2*k] = Re(X[k]),
|
||||
a[2*k+1] = Im(X[k]), 0<=k<n
|
||||
ip[0...*] :work area for bit reversal (int *)
|
||||
length of ip >= 2+sqrt(n)
|
||||
strictly,
|
||||
length of ip >=
|
||||
2+(1<<(int)(log(n+0.5)/log(2))/2).
|
||||
ip[0],ip[1] are pointers of the cos/sin table.
|
||||
w[0...n/2-1] :cos/sin table (float *)
|
||||
w[],ip[] are initialized if ip[0] == 0.
|
||||
[remark]
|
||||
Inverse of
|
||||
cdft(2*n, -1, a, ip, w);
|
||||
is
|
||||
cdft(2*n, 1, a, ip, w);
|
||||
for (j = 0; j <= 2 * n - 1; j++) {
|
||||
a[j] *= 1.0 / n;
|
||||
}
|
||||
.
|
||||
|
||||
|
||||
-------- Real DFT / Inverse of Real DFT --------
|
||||
[definition]
|
||||
<case1> RDFT
|
||||
R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
|
||||
I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
|
||||
<case2> IRDFT (excluding scale)
|
||||
a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
|
||||
sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
|
||||
sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
|
||||
[usage]
|
||||
<case1>
|
||||
ip[0] = 0; // first time only
|
||||
rdft(n, 1, a, ip, w);
|
||||
<case2>
|
||||
ip[0] = 0; // first time only
|
||||
rdft(n, -1, a, ip, w);
|
||||
[parameters]
|
||||
n :data length (size_t)
|
||||
n >= 2, n = power of 2
|
||||
a[0...n-1] :input/output data (float *)
|
||||
<case1>
|
||||
output data
|
||||
a[2*k] = R[k], 0<=k<n/2
|
||||
a[2*k+1] = I[k], 0<k<n/2
|
||||
a[1] = R[n/2]
|
||||
<case2>
|
||||
input data
|
||||
a[2*j] = R[j], 0<=j<n/2
|
||||
a[2*j+1] = I[j], 0<j<n/2
|
||||
a[1] = R[n/2]
|
||||
ip[0...*] :work area for bit reversal (size_t *)
|
||||
length of ip >= 2+sqrt(n/2)
|
||||
strictly,
|
||||
length of ip >=
|
||||
2+(1<<(int)(log(n/2+0.5)/log(2))/2).
|
||||
ip[0],ip[1] are pointers of the cos/sin table.
|
||||
w[0...n/2-1] :cos/sin table (float *)
|
||||
w[],ip[] are initialized if ip[0] == 0.
|
||||
[remark]
|
||||
Inverse of
|
||||
rdft(n, 1, a, ip, w);
|
||||
is
|
||||
rdft(n, -1, a, ip, w);
|
||||
for (j = 0; j <= n - 1; j++) {
|
||||
a[j] *= 2.0 / n;
|
||||
}
|
||||
.
|
||||
|
||||
|
||||
-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
|
||||
[definition]
|
||||
<case1> IDCT (excluding scale)
|
||||
C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
|
||||
<case2> DCT
|
||||
C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
|
||||
[usage]
|
||||
<case1>
|
||||
ip[0] = 0; // first time only
|
||||
ddct(n, 1, a, ip, w);
|
||||
<case2>
|
||||
ip[0] = 0; // first time only
|
||||
ddct(n, -1, a, ip, w);
|
||||
[parameters]
|
||||
n :data length (int)
|
||||
n >= 2, n = power of 2
|
||||
a[0...n-1] :input/output data (float *)
|
||||
output data
|
||||
a[k] = C[k], 0<=k<n
|
||||
ip[0...*] :work area for bit reversal (int *)
|
||||
length of ip >= 2+sqrt(n/2)
|
||||
strictly,
|
||||
length of ip >=
|
||||
2+(1<<(int)(log(n/2+0.5)/log(2))/2).
|
||||
ip[0],ip[1] are pointers of the cos/sin table.
|
||||
w[0...n*5/4-1] :cos/sin table (float *)
|
||||
w[],ip[] are initialized if ip[0] == 0.
|
||||
[remark]
|
||||
Inverse of
|
||||
ddct(n, -1, a, ip, w);
|
||||
is
|
||||
a[0] *= 0.5;
|
||||
ddct(n, 1, a, ip, w);
|
||||
for (j = 0; j <= n - 1; j++) {
|
||||
a[j] *= 2.0 / n;
|
||||
}
|
||||
.
|
||||
|
||||
|
||||
-------- DST (Discrete Sine Transform) / Inverse of DST --------
|
||||
[definition]
|
||||
<case1> IDST (excluding scale)
|
||||
S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
|
||||
<case2> DST
|
||||
S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
|
||||
[usage]
|
||||
<case1>
|
||||
ip[0] = 0; // first time only
|
||||
ddst(n, 1, a, ip, w);
|
||||
<case2>
|
||||
ip[0] = 0; // first time only
|
||||
ddst(n, -1, a, ip, w);
|
||||
[parameters]
|
||||
n :data length (int)
|
||||
n >= 2, n = power of 2
|
||||
a[0...n-1] :input/output data (float *)
|
||||
<case1>
|
||||
input data
|
||||
a[j] = A[j], 0<j<n
|
||||
a[0] = A[n]
|
||||
output data
|
||||
a[k] = S[k], 0<=k<n
|
||||
<case2>
|
||||
output data
|
||||
a[k] = S[k], 0<k<n
|
||||
a[0] = S[n]
|
||||
ip[0...*] :work area for bit reversal (int *)
|
||||
length of ip >= 2+sqrt(n/2)
|
||||
strictly,
|
||||
length of ip >=
|
||||
2+(1<<(int)(log(n/2+0.5)/log(2))/2).
|
||||
ip[0],ip[1] are pointers of the cos/sin table.
|
||||
w[0...n*5/4-1] :cos/sin table (float *)
|
||||
w[],ip[] are initialized if ip[0] == 0.
|
||||
[remark]
|
||||
Inverse of
|
||||
ddst(n, -1, a, ip, w);
|
||||
is
|
||||
a[0] *= 0.5;
|
||||
ddst(n, 1, a, ip, w);
|
||||
for (j = 0; j <= n - 1; j++) {
|
||||
a[j] *= 2.0 / n;
|
||||
}
|
||||
.
|
||||
|
||||
|
||||
-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
|
||||
[definition]
|
||||
C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
|
||||
[usage]
|
||||
ip[0] = 0; // first time only
|
||||
dfct(n, a, t, ip, w);
|
||||
[parameters]
|
||||
n :data length - 1 (int)
|
||||
n >= 2, n = power of 2
|
||||
a[0...n] :input/output data (float *)
|
||||
output data
|
||||
a[k] = C[k], 0<=k<=n
|
||||
t[0...n/2] :work area (float *)
|
||||
ip[0...*] :work area for bit reversal (int *)
|
||||
length of ip >= 2+sqrt(n/4)
|
||||
strictly,
|
||||
length of ip >=
|
||||
2+(1<<(int)(log(n/4+0.5)/log(2))/2).
|
||||
ip[0],ip[1] are pointers of the cos/sin table.
|
||||
w[0...n*5/8-1] :cos/sin table (float *)
|
||||
w[],ip[] are initialized if ip[0] == 0.
|
||||
[remark]
|
||||
Inverse of
|
||||
a[0] *= 0.5;
|
||||
a[n] *= 0.5;
|
||||
dfct(n, a, t, ip, w);
|
||||
is
|
||||
a[0] *= 0.5;
|
||||
a[n] *= 0.5;
|
||||
dfct(n, a, t, ip, w);
|
||||
for (j = 0; j <= n; j++) {
|
||||
a[j] *= 2.0 / n;
|
||||
}
|
||||
.
|
||||
|
||||
|
||||
-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
|
||||
[definition]
|
||||
S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
|
||||
[usage]
|
||||
ip[0] = 0; // first time only
|
||||
dfst(n, a, t, ip, w);
|
||||
[parameters]
|
||||
n :data length + 1 (int)
|
||||
n >= 2, n = power of 2
|
||||
a[0...n-1] :input/output data (float *)
|
||||
output data
|
||||
a[k] = S[k], 0<k<n
|
||||
(a[0] is used for work area)
|
||||
t[0...n/2-1] :work area (float *)
|
||||
ip[0...*] :work area for bit reversal (int *)
|
||||
length of ip >= 2+sqrt(n/4)
|
||||
strictly,
|
||||
length of ip >=
|
||||
2+(1<<(int)(log(n/4+0.5)/log(2))/2).
|
||||
ip[0],ip[1] are pointers of the cos/sin table.
|
||||
w[0...n*5/8-1] :cos/sin table (float *)
|
||||
w[],ip[] are initialized if ip[0] == 0.
|
||||
[remark]
|
||||
Inverse of
|
||||
dfst(n, a, t, ip, w);
|
||||
is
|
||||
dfst(n, a, t, ip, w);
|
||||
for (j = 1; j <= n - 1; j++) {
|
||||
a[j] *= 2.0 / n;
|
||||
}
|
||||
.
|
||||
|
||||
|
||||
Appendix :
|
||||
The cos/sin table is recalculated when the larger table required.
|
||||
w[] and ip[] are compatible with all routines.
|
||||
*/
|
||||
|
||||
#include "common_audio/third_party/ooura/fft_size_256/fft4g.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
void makewt(size_t nw, size_t* ip, float* w);
|
||||
void makect(size_t nc, size_t* ip, float* c);
|
||||
void bitrv2(size_t n, size_t* ip, float* a);
|
||||
void cftfsub(size_t n, float* a, float* w);
|
||||
void cftbsub(size_t n, float* a, float* w);
|
||||
void cft1st(size_t n, float* a, float* w);
|
||||
void cftmdl(size_t n, size_t l, float* a, float* w);
|
||||
void rftfsub(size_t n, float* a, size_t nc, float* c);
|
||||
void rftbsub(size_t n, float* a, size_t nc, float* c);
|
||||
|
||||
/* -------- initializing routines -------- */
|
||||
|
||||
void makewt(size_t nw, size_t* ip, float* w) {
|
||||
size_t j, nwh;
|
||||
float delta, x, y;
|
||||
|
||||
ip[0] = nw;
|
||||
ip[1] = 1;
|
||||
if (nw > 2) {
|
||||
nwh = nw >> 1;
|
||||
delta = atanf(1.0f) / nwh;
|
||||
w[0] = 1;
|
||||
w[1] = 0;
|
||||
w[nwh] = (float)cos(delta * nwh);
|
||||
w[nwh + 1] = w[nwh];
|
||||
if (nwh > 2) {
|
||||
for (j = 2; j < nwh; j += 2) {
|
||||
x = (float)cos(delta * j);
|
||||
y = (float)sin(delta * j);
|
||||
w[j] = x;
|
||||
w[j + 1] = y;
|
||||
w[nw - j] = y;
|
||||
w[nw - j + 1] = x;
|
||||
}
|
||||
bitrv2(nw, ip + 2, w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void makect(size_t nc, size_t* ip, float* c) {
|
||||
size_t j, nch;
|
||||
float delta;
|
||||
|
||||
ip[1] = nc;
|
||||
if (nc > 1) {
|
||||
nch = nc >> 1;
|
||||
delta = atanf(1.0f) / nch;
|
||||
c[0] = (float)cos(delta * nch);
|
||||
c[nch] = 0.5f * c[0];
|
||||
for (j = 1; j < nch; j++) {
|
||||
c[j] = 0.5f * (float)cos(delta * j);
|
||||
c[nc - j] = 0.5f * (float)sin(delta * j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* -------- child routines -------- */
|
||||
|
||||
void bitrv2(size_t n, size_t* ip, float* a) {
|
||||
size_t j, j1, k, k1, l, m, m2;
|
||||
float xr, xi, yr, yi;
|
||||
|
||||
ip[0] = 0;
|
||||
l = n;
|
||||
m = 1;
|
||||
while ((m << 3) < l) {
|
||||
l >>= 1;
|
||||
for (j = 0; j < m; j++) {
|
||||
ip[m + j] = ip[j] + l;
|
||||
}
|
||||
m <<= 1;
|
||||
}
|
||||
m2 = 2 * m;
|
||||
if ((m << 3) == l) {
|
||||
for (k = 0; k < m; k++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
j1 = 2 * j + ip[k];
|
||||
k1 = 2 * k + ip[j];
|
||||
xr = a[j1];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1];
|
||||
yi = a[k1 + 1];
|
||||
a[j1] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
j1 += m2;
|
||||
k1 += 2 * m2;
|
||||
xr = a[j1];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1];
|
||||
yi = a[k1 + 1];
|
||||
a[j1] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
j1 += m2;
|
||||
k1 -= m2;
|
||||
xr = a[j1];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1];
|
||||
yi = a[k1 + 1];
|
||||
a[j1] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
j1 += m2;
|
||||
k1 += 2 * m2;
|
||||
xr = a[j1];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1];
|
||||
yi = a[k1 + 1];
|
||||
a[j1] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
}
|
||||
j1 = 2 * k + m2 + ip[k];
|
||||
k1 = j1 + m2;
|
||||
xr = a[j1];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1];
|
||||
yi = a[k1 + 1];
|
||||
a[j1] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
}
|
||||
} else {
|
||||
for (k = 1; k < m; k++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
j1 = 2 * j + ip[k];
|
||||
k1 = 2 * k + ip[j];
|
||||
xr = a[j1];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1];
|
||||
yi = a[k1 + 1];
|
||||
a[j1] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
j1 += m2;
|
||||
k1 += m2;
|
||||
xr = a[j1];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1];
|
||||
yi = a[k1 + 1];
|
||||
a[j1] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cftfsub(size_t n, float* a, float* w) {
|
||||
size_t j, j1, j2, j3, l;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
l = 2;
|
||||
if (n > 8) {
|
||||
cft1st(n, a, w);
|
||||
l = 8;
|
||||
while ((l << 2) < n) {
|
||||
cftmdl(n, l, a, w);
|
||||
l <<= 2;
|
||||
}
|
||||
}
|
||||
if ((l << 2) == n) {
|
||||
for (j = 0; j < l; j += 2) {
|
||||
j1 = j + l;
|
||||
j2 = j1 + l;
|
||||
j3 = j2 + l;
|
||||
x0r = a[j] + a[j1];
|
||||
x0i = a[j + 1] + a[j1 + 1];
|
||||
x1r = a[j] - a[j1];
|
||||
x1i = a[j + 1] - a[j1 + 1];
|
||||
x2r = a[j2] + a[j3];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2] - a[j3];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i + x2i;
|
||||
a[j2] = x0r - x2r;
|
||||
a[j2 + 1] = x0i - x2i;
|
||||
a[j1] = x1r - x3i;
|
||||
a[j1 + 1] = x1i + x3r;
|
||||
a[j3] = x1r + x3i;
|
||||
a[j3 + 1] = x1i - x3r;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < l; j += 2) {
|
||||
j1 = j + l;
|
||||
x0r = a[j] - a[j1];
|
||||
x0i = a[j + 1] - a[j1 + 1];
|
||||
a[j] += a[j1];
|
||||
a[j + 1] += a[j1 + 1];
|
||||
a[j1] = x0r;
|
||||
a[j1 + 1] = x0i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cftbsub(size_t n, float* a, float* w) {
|
||||
size_t j, j1, j2, j3, l;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
l = 2;
|
||||
if (n > 8) {
|
||||
cft1st(n, a, w);
|
||||
l = 8;
|
||||
while ((l << 2) < n) {
|
||||
cftmdl(n, l, a, w);
|
||||
l <<= 2;
|
||||
}
|
||||
}
|
||||
if ((l << 2) == n) {
|
||||
for (j = 0; j < l; j += 2) {
|
||||
j1 = j + l;
|
||||
j2 = j1 + l;
|
||||
j3 = j2 + l;
|
||||
x0r = a[j] + a[j1];
|
||||
x0i = -a[j + 1] - a[j1 + 1];
|
||||
x1r = a[j] - a[j1];
|
||||
x1i = -a[j + 1] + a[j1 + 1];
|
||||
x2r = a[j2] + a[j3];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2] - a[j3];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i - x2i;
|
||||
a[j2] = x0r - x2r;
|
||||
a[j2 + 1] = x0i + x2i;
|
||||
a[j1] = x1r - x3i;
|
||||
a[j1 + 1] = x1i - x3r;
|
||||
a[j3] = x1r + x3i;
|
||||
a[j3 + 1] = x1i + x3r;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < l; j += 2) {
|
||||
j1 = j + l;
|
||||
x0r = a[j] - a[j1];
|
||||
x0i = -a[j + 1] + a[j1 + 1];
|
||||
a[j] += a[j1];
|
||||
a[j + 1] = -a[j + 1] - a[j1 + 1];
|
||||
a[j1] = x0r;
|
||||
a[j1 + 1] = x0i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cft1st(size_t n, float* a, float* w) {
|
||||
size_t j, k1, k2;
|
||||
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
x0r = a[0] + a[2];
|
||||
x0i = a[1] + a[3];
|
||||
x1r = a[0] - a[2];
|
||||
x1i = a[1] - a[3];
|
||||
x2r = a[4] + a[6];
|
||||
x2i = a[5] + a[7];
|
||||
x3r = a[4] - a[6];
|
||||
x3i = a[5] - a[7];
|
||||
a[0] = x0r + x2r;
|
||||
a[1] = x0i + x2i;
|
||||
a[4] = x0r - x2r;
|
||||
a[5] = x0i - x2i;
|
||||
a[2] = x1r - x3i;
|
||||
a[3] = x1i + x3r;
|
||||
a[6] = x1r + x3i;
|
||||
a[7] = x1i - x3r;
|
||||
wk1r = w[2];
|
||||
x0r = a[8] + a[10];
|
||||
x0i = a[9] + a[11];
|
||||
x1r = a[8] - a[10];
|
||||
x1i = a[9] - a[11];
|
||||
x2r = a[12] + a[14];
|
||||
x2i = a[13] + a[15];
|
||||
x3r = a[12] - a[14];
|
||||
x3i = a[13] - a[15];
|
||||
a[8] = x0r + x2r;
|
||||
a[9] = x0i + x2i;
|
||||
a[12] = x2i - x0i;
|
||||
a[13] = x0r - x2r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[10] = wk1r * (x0r - x0i);
|
||||
a[11] = wk1r * (x0r + x0i);
|
||||
x0r = x3i + x1r;
|
||||
x0i = x3r - x1i;
|
||||
a[14] = wk1r * (x0i - x0r);
|
||||
a[15] = wk1r * (x0i + x0r);
|
||||
k1 = 0;
|
||||
for (j = 16; j < n; j += 16) {
|
||||
k1 += 2;
|
||||
k2 = 2 * k1;
|
||||
wk2r = w[k1];
|
||||
wk2i = w[k1 + 1];
|
||||
wk1r = w[k2];
|
||||
wk1i = w[k2 + 1];
|
||||
wk3r = wk1r - 2 * wk2i * wk1i;
|
||||
wk3i = 2 * wk2i * wk1r - wk1i;
|
||||
x0r = a[j] + a[j + 2];
|
||||
x0i = a[j + 1] + a[j + 3];
|
||||
x1r = a[j] - a[j + 2];
|
||||
x1i = a[j + 1] - a[j + 3];
|
||||
x2r = a[j + 4] + a[j + 6];
|
||||
x2i = a[j + 5] + a[j + 7];
|
||||
x3r = a[j + 4] - a[j + 6];
|
||||
x3i = a[j + 5] - a[j + 7];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j + 4] = wk2r * x0r - wk2i * x0i;
|
||||
a[j + 5] = wk2r * x0i + wk2i * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j + 2] = wk1r * x0r - wk1i * x0i;
|
||||
a[j + 3] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j + 6] = wk3r * x0r - wk3i * x0i;
|
||||
a[j + 7] = wk3r * x0i + wk3i * x0r;
|
||||
wk1r = w[k2 + 2];
|
||||
wk1i = w[k2 + 3];
|
||||
wk3r = wk1r - 2 * wk2r * wk1i;
|
||||
wk3i = 2 * wk2r * wk1r - wk1i;
|
||||
x0r = a[j + 8] + a[j + 10];
|
||||
x0i = a[j + 9] + a[j + 11];
|
||||
x1r = a[j + 8] - a[j + 10];
|
||||
x1i = a[j + 9] - a[j + 11];
|
||||
x2r = a[j + 12] + a[j + 14];
|
||||
x2i = a[j + 13] + a[j + 15];
|
||||
x3r = a[j + 12] - a[j + 14];
|
||||
x3i = a[j + 13] - a[j + 15];
|
||||
a[j + 8] = x0r + x2r;
|
||||
a[j + 9] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j + 12] = -wk2i * x0r - wk2r * x0i;
|
||||
a[j + 13] = -wk2i * x0i + wk2r * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j + 10] = wk1r * x0r - wk1i * x0i;
|
||||
a[j + 11] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j + 14] = wk3r * x0r - wk3i * x0i;
|
||||
a[j + 15] = wk3r * x0i + wk3i * x0r;
|
||||
}
|
||||
}
|
||||
|
||||
void cftmdl(size_t n, size_t l, float* a, float* w) {
|
||||
size_t j, j1, j2, j3, k, k1, k2, m, m2;
|
||||
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
m = l << 2;
|
||||
for (j = 0; j < l; j += 2) {
|
||||
j1 = j + l;
|
||||
j2 = j1 + l;
|
||||
j3 = j2 + l;
|
||||
x0r = a[j] + a[j1];
|
||||
x0i = a[j + 1] + a[j1 + 1];
|
||||
x1r = a[j] - a[j1];
|
||||
x1i = a[j + 1] - a[j1 + 1];
|
||||
x2r = a[j2] + a[j3];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2] - a[j3];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i + x2i;
|
||||
a[j2] = x0r - x2r;
|
||||
a[j2 + 1] = x0i - x2i;
|
||||
a[j1] = x1r - x3i;
|
||||
a[j1 + 1] = x1i + x3r;
|
||||
a[j3] = x1r + x3i;
|
||||
a[j3 + 1] = x1i - x3r;
|
||||
}
|
||||
wk1r = w[2];
|
||||
for (j = m; j < l + m; j += 2) {
|
||||
j1 = j + l;
|
||||
j2 = j1 + l;
|
||||
j3 = j2 + l;
|
||||
x0r = a[j] + a[j1];
|
||||
x0i = a[j + 1] + a[j1 + 1];
|
||||
x1r = a[j] - a[j1];
|
||||
x1i = a[j + 1] - a[j1 + 1];
|
||||
x2r = a[j2] + a[j3];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2] - a[j3];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i + x2i;
|
||||
a[j2] = x2i - x0i;
|
||||
a[j2 + 1] = x0r - x2r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j1] = wk1r * (x0r - x0i);
|
||||
a[j1 + 1] = wk1r * (x0r + x0i);
|
||||
x0r = x3i + x1r;
|
||||
x0i = x3r - x1i;
|
||||
a[j3] = wk1r * (x0i - x0r);
|
||||
a[j3 + 1] = wk1r * (x0i + x0r);
|
||||
}
|
||||
k1 = 0;
|
||||
m2 = 2 * m;
|
||||
for (k = m2; k < n; k += m2) {
|
||||
k1 += 2;
|
||||
k2 = 2 * k1;
|
||||
wk2r = w[k1];
|
||||
wk2i = w[k1 + 1];
|
||||
wk1r = w[k2];
|
||||
wk1i = w[k2 + 1];
|
||||
wk3r = wk1r - 2 * wk2i * wk1i;
|
||||
wk3i = 2 * wk2i * wk1r - wk1i;
|
||||
for (j = k; j < l + k; j += 2) {
|
||||
j1 = j + l;
|
||||
j2 = j1 + l;
|
||||
j3 = j2 + l;
|
||||
x0r = a[j] + a[j1];
|
||||
x0i = a[j + 1] + a[j1 + 1];
|
||||
x1r = a[j] - a[j1];
|
||||
x1i = a[j + 1] - a[j1 + 1];
|
||||
x2r = a[j2] + a[j3];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2] - a[j3];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j2] = wk2r * x0r - wk2i * x0i;
|
||||
a[j2 + 1] = wk2r * x0i + wk2i * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j1] = wk1r * x0r - wk1i * x0i;
|
||||
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j3] = wk3r * x0r - wk3i * x0i;
|
||||
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
|
||||
}
|
||||
wk1r = w[k2 + 2];
|
||||
wk1i = w[k2 + 3];
|
||||
wk3r = wk1r - 2 * wk2r * wk1i;
|
||||
wk3i = 2 * wk2r * wk1r - wk1i;
|
||||
for (j = k + m; j < l + (k + m); j += 2) {
|
||||
j1 = j + l;
|
||||
j2 = j1 + l;
|
||||
j3 = j2 + l;
|
||||
x0r = a[j] + a[j1];
|
||||
x0i = a[j + 1] + a[j1 + 1];
|
||||
x1r = a[j] - a[j1];
|
||||
x1i = a[j + 1] - a[j1 + 1];
|
||||
x2r = a[j2] + a[j3];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2] - a[j3];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j2] = -wk2i * x0r - wk2r * x0i;
|
||||
a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j1] = wk1r * x0r - wk1i * x0i;
|
||||
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j3] = wk3r * x0r - wk3i * x0i;
|
||||
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rftfsub(size_t n, float* a, size_t nc, float* c) {
|
||||
size_t j, k, kk, ks, m;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
m = n >> 1;
|
||||
ks = 2 * nc / m;
|
||||
kk = 0;
|
||||
for (j = 2; j < m; j += 2) {
|
||||
k = n - j;
|
||||
kk += ks;
|
||||
wkr = 0.5f - c[nc - kk];
|
||||
wki = c[kk];
|
||||
xr = a[j] - a[k];
|
||||
xi = a[j + 1] + a[k + 1];
|
||||
yr = wkr * xr - wki * xi;
|
||||
yi = wkr * xi + wki * xr;
|
||||
a[j] -= yr;
|
||||
a[j + 1] -= yi;
|
||||
a[k] += yr;
|
||||
a[k + 1] -= yi;
|
||||
}
|
||||
}
|
||||
|
||||
void rftbsub(size_t n, float* a, size_t nc, float* c) {
|
||||
size_t j, k, kk, ks, m;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
a[1] = -a[1];
|
||||
m = n >> 1;
|
||||
ks = 2 * nc / m;
|
||||
kk = 0;
|
||||
for (j = 2; j < m; j += 2) {
|
||||
k = n - j;
|
||||
kk += ks;
|
||||
wkr = 0.5f - c[nc - kk];
|
||||
wki = c[kk];
|
||||
xr = a[j] - a[k];
|
||||
xi = a[j + 1] + a[k + 1];
|
||||
yr = wkr * xr + wki * xi;
|
||||
yi = wkr * xi - wki * xr;
|
||||
a[j] -= yr;
|
||||
a[j + 1] = yi - a[j + 1];
|
||||
a[k] += yr;
|
||||
a[k + 1] = yi - a[k + 1];
|
||||
}
|
||||
a[m + 1] = -a[m + 1];
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void WebRtc_rdft(size_t n, int isgn, float* a, size_t* ip, float* w) {
|
||||
size_t nw, nc;
|
||||
float xi;
|
||||
|
||||
nw = ip[0];
|
||||
if (n > (nw << 2)) {
|
||||
nw = n >> 2;
|
||||
makewt(nw, ip, w);
|
||||
}
|
||||
nc = ip[1];
|
||||
if (n > (nc << 2)) {
|
||||
nc = n >> 2;
|
||||
makect(nc, ip, w + nw);
|
||||
}
|
||||
if (isgn >= 0) {
|
||||
if (n > 4) {
|
||||
bitrv2(n, ip + 2, a);
|
||||
cftfsub(n, a, w);
|
||||
rftfsub(n, a, nc, w + nw);
|
||||
} else if (n == 4) {
|
||||
cftfsub(n, a, w);
|
||||
}
|
||||
xi = a[0] - a[1];
|
||||
a[0] += a[1];
|
||||
a[1] = xi;
|
||||
} else {
|
||||
a[1] = 0.5f * (a[0] - a[1]);
|
||||
a[0] -= a[1];
|
||||
if (n > 4) {
|
||||
rftbsub(n, a, nc, w + nw);
|
||||
bitrv2(n, ip + 2, a);
|
||||
cftbsub(n, a, w);
|
||||
} else if (n == 4) {
|
||||
cftfsub(n, a, w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
23
VocieProcess/common_audio/third_party/ooura/fft_size_256/fft4g.h
vendored
Normal file
23
VocieProcess/common_audio/third_party/ooura/fft_size_256/fft4g.h
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the ../../../LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_
|
||||
#define COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Refer to fft4g.c for documentation.
|
||||
void WebRtc_rdft(size_t n, int isgn, float* a, size_t* ip, float* w);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_
|
84
VocieProcess/modules/audio_processing/ns/fast_math.cc
Normal file
84
VocieProcess/modules/audio_processing/ns/fast_math.cc
Normal file
@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/fast_math.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
float FastLog2f(float in) {
|
||||
RTC_DCHECK_GT(in, .0f);
|
||||
// Read and interpret float as uint32_t and then cast to float.
|
||||
// This is done to extract the exponent (bits 30 - 23).
|
||||
// "Right shift" of the exponent is then performed by multiplying
|
||||
// with the constant (1/2^23). Finally, we subtract a constant to
|
||||
// remove the bias (https://en.wikipedia.org/wiki/Exponent_bias).
|
||||
union {
|
||||
float dummy;
|
||||
uint32_t a;
|
||||
} x = {in};
|
||||
float out = x.a;
|
||||
out *= 1.1920929e-7f; // 1/2^23
|
||||
out -= 126.942695f; // Remove bias.
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
float SqrtFastApproximation(float f) {
|
||||
// TODO(peah): Add fast approximate implementation.
|
||||
return sqrtf(f);
|
||||
}
|
||||
|
||||
float Pow2Approximation(float p) {
|
||||
// TODO(peah): Add fast approximate implementation.
|
||||
return powf(2.f, p);
|
||||
}
|
||||
|
||||
float PowApproximation(float x, float p) {
|
||||
return Pow2Approximation(p * FastLog2f(x));
|
||||
}
|
||||
|
||||
float LogApproximation(float x) {
|
||||
constexpr float kLogOf2 = 0.69314718056f;
|
||||
return FastLog2f(x) * kLogOf2;
|
||||
}
|
||||
|
||||
void LogApproximation(rtc::ArrayView<const float> x, rtc::ArrayView<float> y) {
|
||||
for (size_t k = 0; k < x.size(); ++k) {
|
||||
y[k] = LogApproximation(x[k]);
|
||||
}
|
||||
}
|
||||
|
||||
float ExpApproximation(float x) {
|
||||
constexpr float kLog10Ofe = 0.4342944819f;
|
||||
return PowApproximation(10.f, x * kLog10Ofe);
|
||||
}
|
||||
|
||||
void ExpApproximation(rtc::ArrayView<const float> x, rtc::ArrayView<float> y) {
|
||||
for (size_t k = 0; k < x.size(); ++k) {
|
||||
y[k] = ExpApproximation(x[k]);
|
||||
}
|
||||
}
|
||||
|
||||
void ExpApproximationSignFlip(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<float> y) {
|
||||
for (size_t k = 0; k < x.size(); ++k) {
|
||||
y[k] = ExpApproximation(-x[k]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
38
VocieProcess/modules/audio_processing/ns/fast_math.h
Normal file
38
VocieProcess/modules/audio_processing/ns/fast_math.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_
|
||||
|
||||
#include "api/array_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Sqrt approximation.
|
||||
float SqrtFastApproximation(float f);
|
||||
|
||||
// Log base conversion log(x) = log2(x)/log2(e).
|
||||
float LogApproximation(float x);
|
||||
void LogApproximation(rtc::ArrayView<const float> x, rtc::ArrayView<float> y);
|
||||
|
||||
// 2^x approximation.
|
||||
float Pow2Approximation(float p);
|
||||
|
||||
// x^p approximation.
|
||||
float PowApproximation(float x, float p);
|
||||
|
||||
// e^x approximation.
|
||||
float ExpApproximation(float x);
|
||||
void ExpApproximation(rtc::ArrayView<const float> x, rtc::ArrayView<float> y);
|
||||
void ExpApproximationSignFlip(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<float> y);
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_
|
47
VocieProcess/modules/audio_processing/ns/histograms.cc
Normal file
47
VocieProcess/modules/audio_processing/ns/histograms.cc
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/histograms.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
Histograms::Histograms() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
void Histograms::Clear() {
|
||||
lrt_.fill(0);
|
||||
spectral_flatness_.fill(0);
|
||||
spectral_diff_.fill(0);
|
||||
}
|
||||
|
||||
void Histograms::Update(const SignalModel& features_) {
|
||||
// Update the histogram for the LRT.
|
||||
constexpr float kOneByBinSizeLrt = 1.f / kBinSizeLrt;
|
||||
if (features_.lrt < kHistogramSize * kBinSizeLrt && features_.lrt >= 0.f) {
|
||||
++lrt_[kOneByBinSizeLrt * features_.lrt];
|
||||
}
|
||||
|
||||
// Update histogram for the spectral flatness.
|
||||
constexpr float kOneByBinSizeSpecFlat = 1.f / kBinSizeSpecFlat;
|
||||
if (features_.spectral_flatness < kHistogramSize * kBinSizeSpecFlat &&
|
||||
features_.spectral_flatness >= 0.f) {
|
||||
++spectral_flatness_[features_.spectral_flatness * kOneByBinSizeSpecFlat];
|
||||
}
|
||||
|
||||
// Update histogram for the spectral difference.
|
||||
constexpr float kOneByBinSizeSpecDiff = 1.f / kBinSizeSpecDiff;
|
||||
if (features_.spectral_diff < kHistogramSize * kBinSizeSpecDiff &&
|
||||
features_.spectral_diff >= 0.f) {
|
||||
++spectral_diff_[features_.spectral_diff * kOneByBinSizeSpecDiff];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
55
VocieProcess/modules/audio_processing/ns/histograms.h
Normal file
55
VocieProcess/modules/audio_processing/ns/histograms.h
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/ns/ns_common.h"
|
||||
#include "modules/audio_processing/ns/signal_model.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
constexpr int kHistogramSize = 1000;
|
||||
|
||||
// Class for handling the updating of histograms.
|
||||
class Histograms {
|
||||
public:
|
||||
Histograms();
|
||||
Histograms(const Histograms&) = delete;
|
||||
Histograms& operator=(const Histograms&) = delete;
|
||||
|
||||
// Clears the histograms.
|
||||
void Clear();
|
||||
|
||||
// Extracts thresholds for feature parameters and updates the corresponding
|
||||
// histogram.
|
||||
void Update(const SignalModel& features_);
|
||||
|
||||
// Methods for accessing the histograms.
|
||||
rtc::ArrayView<const int, kHistogramSize> get_lrt() const { return lrt_; }
|
||||
rtc::ArrayView<const int, kHistogramSize> get_spectral_flatness() const {
|
||||
return spectral_flatness_;
|
||||
}
|
||||
rtc::ArrayView<const int, kHistogramSize> get_spectral_diff() const {
|
||||
return spectral_diff_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<int, kHistogramSize> lrt_;
|
||||
std::array<int, kHistogramSize> spectral_flatness_;
|
||||
std::array<int, kHistogramSize> spectral_diff_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_
|
195
VocieProcess/modules/audio_processing/ns/noise_estimator.cc
Normal file
195
VocieProcess/modules/audio_processing/ns/noise_estimator.cc
Normal file
@ -0,0 +1,195 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/noise_estimator.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/ns/fast_math.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Log(i).
|
||||
constexpr std::array<float, 129> log_table = {
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 1.609438f, 1.791759f,
|
||||
1.945910f, 2.079442f, 2.197225f, 2.302585f, 2.397895f, 2.484907f, 2.564949f,
|
||||
2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f,
|
||||
3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f,
|
||||
3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f,
|
||||
3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f,
|
||||
3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f,
|
||||
3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f,
|
||||
4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f,
|
||||
4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f,
|
||||
4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f,
|
||||
4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f,
|
||||
4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f,
|
||||
4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f,
|
||||
4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f,
|
||||
4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f,
|
||||
4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f,
|
||||
4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f,
|
||||
4.836282f, 4.844187f, 4.852030f};
|
||||
|
||||
} // namespace
|
||||
|
||||
NoiseEstimator::NoiseEstimator(const SuppressionParams& suppression_params)
|
||||
: suppression_params_(suppression_params) {
|
||||
noise_spectrum_.fill(0.f);
|
||||
prev_noise_spectrum_.fill(0.f);
|
||||
conservative_noise_spectrum_.fill(0.f);
|
||||
parametric_noise_spectrum_.fill(0.f);
|
||||
}
|
||||
|
||||
void NoiseEstimator::PrepareAnalysis() {
|
||||
std::copy(noise_spectrum_.begin(), noise_spectrum_.end(),
|
||||
prev_noise_spectrum_.begin());
|
||||
}
|
||||
|
||||
void NoiseEstimator::PreUpdate(
|
||||
int32_t num_analyzed_frames,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
|
||||
float signal_spectral_sum) {
|
||||
quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_);
|
||||
|
||||
if (num_analyzed_frames < kShortStartupPhaseBlocks) {
|
||||
// Compute simplified noise model during startup.
|
||||
const size_t kStartBand = 5;
|
||||
float sum_log_i_log_magn = 0.f;
|
||||
float sum_log_i = 0.f;
|
||||
float sum_log_i_square = 0.f;
|
||||
float sum_log_magn = 0.f;
|
||||
for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) {
|
||||
float log_i = log_table[i];
|
||||
sum_log_i += log_i;
|
||||
sum_log_i_square += log_i * log_i;
|
||||
float log_signal = LogApproximation(signal_spectrum[i]);
|
||||
sum_log_magn += log_signal;
|
||||
sum_log_i_log_magn += log_i * log_signal;
|
||||
}
|
||||
|
||||
// Estimate the parameter for the level of the white noise.
|
||||
constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1;
|
||||
white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 *
|
||||
suppression_params_.over_subtraction_factor;
|
||||
|
||||
// Estimate pink noise parameters.
|
||||
float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) -
|
||||
sum_log_i * sum_log_i;
|
||||
float num =
|
||||
sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn;
|
||||
RTC_DCHECK_NE(denom, 0.f);
|
||||
float pink_noise_adjustment = num / denom;
|
||||
|
||||
// Constrain the estimated spectrum to be positive.
|
||||
pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f);
|
||||
pink_noise_numerator_ += pink_noise_adjustment;
|
||||
num = sum_log_i * sum_log_magn -
|
||||
(kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn;
|
||||
RTC_DCHECK_NE(denom, 0.f);
|
||||
pink_noise_adjustment = num / denom;
|
||||
|
||||
// Constrain the pink noise power to be in the interval [0, 1].
|
||||
pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f);
|
||||
|
||||
pink_noise_exp_ += pink_noise_adjustment;
|
||||
|
||||
const float one_by_num_analyzed_frames_plus_1 =
|
||||
1.f / (num_analyzed_frames + 1.f);
|
||||
|
||||
// Calculate the frequency-independent parts of parametric noise estimate.
|
||||
float parametric_exp = 0.f;
|
||||
float parametric_num = 0.f;
|
||||
if (pink_noise_exp_ > 0.f) {
|
||||
// Use pink noise estimate.
|
||||
parametric_num = ExpApproximation(pink_noise_numerator_ *
|
||||
one_by_num_analyzed_frames_plus_1);
|
||||
parametric_num *= num_analyzed_frames + 1.f;
|
||||
parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1;
|
||||
}
|
||||
|
||||
constexpr float kOneByShortStartupPhaseBlocks =
|
||||
1.f / kShortStartupPhaseBlocks;
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
// Estimate the background noise using the white and pink noise
|
||||
// parameters.
|
||||
if (pink_noise_exp_ == 0.f) {
|
||||
// Use white noise estimate.
|
||||
parametric_noise_spectrum_[i] = white_noise_level_;
|
||||
} else {
|
||||
// Use pink noise estimate.
|
||||
float use_band = i < kStartBand ? kStartBand : i;
|
||||
float denom = PowApproximation(use_band, parametric_exp);
|
||||
RTC_DCHECK_NE(denom, 0.f);
|
||||
parametric_noise_spectrum_[i] = parametric_num / denom;
|
||||
}
|
||||
}
|
||||
|
||||
// Weight quantile noise with modeled noise.
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
noise_spectrum_[i] *= num_analyzed_frames;
|
||||
float tmp = parametric_noise_spectrum_[i] *
|
||||
(kShortStartupPhaseBlocks - num_analyzed_frames);
|
||||
noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1;
|
||||
noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NoiseEstimator::PostUpdate(
|
||||
rtc::ArrayView<const float> speech_probability,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) {
|
||||
// Time-avg parameter for noise_spectrum update.
|
||||
constexpr float kNoiseUpdate = 0.9f;
|
||||
|
||||
float gamma = kNoiseUpdate;
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
const float prob_speech = speech_probability[i];
|
||||
const float prob_non_speech = 1.f - prob_speech;
|
||||
|
||||
// Temporary noise update used for speech frames if update value is less
|
||||
// than previous.
|
||||
float noise_update_tmp =
|
||||
gamma * prev_noise_spectrum_[i] +
|
||||
(1.f - gamma) * (prob_non_speech * signal_spectrum[i] +
|
||||
prob_speech * prev_noise_spectrum_[i]);
|
||||
|
||||
// Time-constant based on speech/noise_spectrum state.
|
||||
float gamma_old = gamma;
|
||||
|
||||
// Increase gamma for frame likely to be seech.
|
||||
constexpr float kProbRange = .2f;
|
||||
gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate;
|
||||
|
||||
// Conservative noise_spectrum update.
|
||||
if (prob_speech < kProbRange) {
|
||||
conservative_noise_spectrum_[i] +=
|
||||
0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]);
|
||||
}
|
||||
|
||||
// Noise_spectrum update.
|
||||
if (gamma == gamma_old) {
|
||||
noise_spectrum_[i] = noise_update_tmp;
|
||||
} else {
|
||||
noise_spectrum_[i] =
|
||||
gamma * prev_noise_spectrum_[i] +
|
||||
(1.f - gamma) * (prob_non_speech * signal_spectrum[i] +
|
||||
prob_speech * prev_noise_spectrum_[i]);
|
||||
// Allow for noise_spectrum update downwards: If noise_spectrum update
|
||||
// decreases the noise_spectrum, it is safe, so allow it to happen.
|
||||
noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
77
VocieProcess/modules/audio_processing/ns/noise_estimator.h
Normal file
77
VocieProcess/modules/audio_processing/ns/noise_estimator.h
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/ns/ns_common.h"
|
||||
#include "modules/audio_processing/ns/quantile_noise_estimator.h"
|
||||
#include "modules/audio_processing/ns/suppression_params.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for estimating the spectral characteristics of the noise in an incoming
|
||||
// signal.
|
||||
class NoiseEstimator {
|
||||
public:
|
||||
explicit NoiseEstimator(const SuppressionParams& suppression_params);
|
||||
|
||||
// Prepare the estimator for analysis of a new frame.
|
||||
void PrepareAnalysis();
|
||||
|
||||
// Performs the first step of the estimator update.
|
||||
void PreUpdate(int32_t num_analyzed_frames,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
|
||||
float signal_spectral_sum);
|
||||
|
||||
// Performs the second step of the estimator update.
|
||||
void PostUpdate(
|
||||
rtc::ArrayView<const float> speech_probability,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum);
|
||||
|
||||
// Returns the noise spectral estimate.
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> get_noise_spectrum() const {
|
||||
return noise_spectrum_;
|
||||
}
|
||||
|
||||
// Returns the noise from the previous frame.
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> get_prev_noise_spectrum()
|
||||
const {
|
||||
return prev_noise_spectrum_;
|
||||
}
|
||||
|
||||
// Returns a noise spectral estimate based on white and pink noise parameters.
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> get_parametric_noise_spectrum()
|
||||
const {
|
||||
return parametric_noise_spectrum_;
|
||||
}
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1>
|
||||
get_conservative_noise_spectrum() const {
|
||||
return conservative_noise_spectrum_;
|
||||
}
|
||||
|
||||
private:
|
||||
const SuppressionParams& suppression_params_;
|
||||
float white_noise_level_ = 0.f;
|
||||
float pink_noise_numerator_ = 0.f;
|
||||
float pink_noise_exp_ = 0.f;
|
||||
std::array<float, kFftSizeBy2Plus1> prev_noise_spectrum_;
|
||||
std::array<float, kFftSizeBy2Plus1> conservative_noise_spectrum_;
|
||||
std::array<float, kFftSizeBy2Plus1> parametric_noise_spectrum_;
|
||||
std::array<float, kFftSizeBy2Plus1> noise_spectrum_;
|
||||
QuantileNoiseEstimator quantile_noise_estimator_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_
|
556
VocieProcess/modules/audio_processing/ns/noise_suppressor.cc
Normal file
556
VocieProcess/modules/audio_processing/ns/noise_suppressor.cc
Normal file
@ -0,0 +1,556 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/noise_suppressor.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/ns/fast_math.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Maps sample rate to number of bands.
|
||||
size_t NumBandsForRate(size_t sample_rate_hz) {
|
||||
RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000 ||
|
||||
sample_rate_hz == 48000);
|
||||
return sample_rate_hz / 16000;
|
||||
}
|
||||
|
||||
// Maximum number of channels for which the channel data is stored on
|
||||
// the stack. If the number of channels are larger than this, they are stored
|
||||
// using scratch memory that is pre-allocated on the heap. The reason for this
|
||||
// partitioning is not to waste heap space for handling the more common numbers
|
||||
// of channels, while at the same time not limiting the support for higher
|
||||
// numbers of channels by enforcing the channel data to be stored on the
|
||||
// stack using a fixed maximum value.
|
||||
constexpr size_t kMaxNumChannelsOnStack = 2;
|
||||
|
||||
// Chooses the number of channels to store on the heap when that is required due
|
||||
// to the number of channels being larger than the pre-defined number
|
||||
// of channels to store on the stack.
|
||||
size_t NumChannelsOnHeap(size_t num_channels) {
|
||||
return num_channels > kMaxNumChannelsOnStack ? num_channels : 0;
|
||||
}
|
||||
|
||||
// Hybrib Hanning and flat window for the filterbank.
|
||||
constexpr std::array<float, 96> kBlocks160w256FirstHalf = {
|
||||
0.00000000f, 0.01636173f, 0.03271908f, 0.04906767f, 0.06540313f,
|
||||
0.08172107f, 0.09801714f, 0.11428696f, 0.13052619f, 0.14673047f,
|
||||
0.16289547f, 0.17901686f, 0.19509032f, 0.21111155f, 0.22707626f,
|
||||
0.24298018f, 0.25881905f, 0.27458862f, 0.29028468f, 0.30590302f,
|
||||
0.32143947f, 0.33688985f, 0.35225005f, 0.36751594f, 0.38268343f,
|
||||
0.39774847f, 0.41270703f, 0.42755509f, 0.44228869f, 0.45690388f,
|
||||
0.47139674f, 0.48576339f, 0.50000000f, 0.51410274f, 0.52806785f,
|
||||
0.54189158f, 0.55557023f, 0.56910015f, 0.58247770f, 0.59569930f,
|
||||
0.60876143f, 0.62166057f, 0.63439328f, 0.64695615f, 0.65934582f,
|
||||
0.67155895f, 0.68359230f, 0.69544264f, 0.70710678f, 0.71858162f,
|
||||
0.72986407f, 0.74095113f, 0.75183981f, 0.76252720f, 0.77301045f,
|
||||
0.78328675f, 0.79335334f, 0.80320753f, 0.81284668f, 0.82226822f,
|
||||
0.83146961f, 0.84044840f, 0.84920218f, 0.85772861f, 0.86602540f,
|
||||
0.87409034f, 0.88192126f, 0.88951608f, 0.89687274f, 0.90398929f,
|
||||
0.91086382f, 0.91749450f, 0.92387953f, 0.93001722f, 0.93590593f,
|
||||
0.94154407f, 0.94693013f, 0.95206268f, 0.95694034f, 0.96156180f,
|
||||
0.96592583f, 0.97003125f, 0.97387698f, 0.97746197f, 0.98078528f,
|
||||
0.98384601f, 0.98664333f, 0.98917651f, 0.99144486f, 0.99344778f,
|
||||
0.99518473f, 0.99665524f, 0.99785892f, 0.99879546f, 0.99946459f,
|
||||
0.99986614f};
|
||||
|
||||
// Applies the filterbank window to a buffer.
|
||||
void ApplyFilterBankWindow(rtc::ArrayView<float, kFftSize> x) {
|
||||
for (size_t i = 0; i < 96; ++i) {
|
||||
x[i] = kBlocks160w256FirstHalf[i] * x[i];
|
||||
}
|
||||
|
||||
for (size_t i = 161, k = 95; i < kFftSize; ++i, --k) {
|
||||
RTC_DCHECK_NE(0, k);
|
||||
x[i] = kBlocks160w256FirstHalf[k] * x[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Extends a frame with previous data.
|
||||
void FormExtendedFrame(rtc::ArrayView<const float, kNsFrameSize> frame,
|
||||
rtc::ArrayView<float, kFftSize - kNsFrameSize> old_data,
|
||||
rtc::ArrayView<float, kFftSize> extended_frame) {
|
||||
std::copy(old_data.begin(), old_data.end(), extended_frame.begin());
|
||||
std::copy(frame.begin(), frame.end(),
|
||||
extended_frame.begin() + old_data.size());
|
||||
std::copy(extended_frame.end() - old_data.size(), extended_frame.end(),
|
||||
old_data.begin());
|
||||
}
|
||||
|
||||
// Uses overlap-and-add to produce an output frame.
|
||||
void OverlapAndAdd(rtc::ArrayView<const float, kFftSize> extended_frame,
|
||||
rtc::ArrayView<float, kOverlapSize> overlap_memory,
|
||||
rtc::ArrayView<float, kNsFrameSize> output_frame) {
|
||||
for (size_t i = 0; i < kOverlapSize; ++i) {
|
||||
output_frame[i] = overlap_memory[i] + extended_frame[i];
|
||||
}
|
||||
std::copy(extended_frame.begin() + kOverlapSize,
|
||||
extended_frame.begin() + kNsFrameSize,
|
||||
output_frame.begin() + kOverlapSize);
|
||||
std::copy(extended_frame.begin() + kNsFrameSize, extended_frame.end(),
|
||||
overlap_memory.begin());
|
||||
}
|
||||
|
||||
// Produces a delayed frame.
|
||||
void DelaySignal(rtc::ArrayView<const float, kNsFrameSize> frame,
|
||||
rtc::ArrayView<float, kFftSize - kNsFrameSize> delay_buffer,
|
||||
rtc::ArrayView<float, kNsFrameSize> delayed_frame) {
|
||||
constexpr size_t kSamplesFromFrame = kNsFrameSize - (kFftSize - kNsFrameSize);
|
||||
std::copy(delay_buffer.begin(), delay_buffer.end(), delayed_frame.begin());
|
||||
std::copy(frame.begin(), frame.begin() + kSamplesFromFrame,
|
||||
delayed_frame.begin() + delay_buffer.size());
|
||||
|
||||
std::copy(frame.begin() + kSamplesFromFrame, frame.end(),
|
||||
delay_buffer.begin());
|
||||
}
|
||||
|
||||
// Computes the energy of an extended frame.
|
||||
float ComputeEnergyOfExtendedFrame(rtc::ArrayView<const float, kFftSize> x) {
|
||||
float energy = 0.f;
|
||||
for (float x_k : x) {
|
||||
energy += x_k * x_k;
|
||||
}
|
||||
|
||||
return energy;
|
||||
}
|
||||
|
||||
// Computes the energy of an extended frame based on its subcomponents.
|
||||
float ComputeEnergyOfExtendedFrame(
|
||||
rtc::ArrayView<const float, kNsFrameSize> frame,
|
||||
rtc::ArrayView<float, kFftSize - kNsFrameSize> old_data) {
|
||||
float energy = 0.f;
|
||||
for (float v : old_data) {
|
||||
energy += v * v;
|
||||
}
|
||||
for (float v : frame) {
|
||||
energy += v * v;
|
||||
}
|
||||
|
||||
return energy;
|
||||
}
|
||||
|
||||
// Computes the magnitude spectrum based on an FFT output.
|
||||
void ComputeMagnitudeSpectrum(
|
||||
rtc::ArrayView<const float, kFftSize> real,
|
||||
rtc::ArrayView<const float, kFftSize> imag,
|
||||
rtc::ArrayView<float, kFftSizeBy2Plus1> signal_spectrum) {
|
||||
signal_spectrum[0] = fabsf(real[0]) + 1.f;
|
||||
signal_spectrum[kFftSizeBy2Plus1 - 1] =
|
||||
fabsf(real[kFftSizeBy2Plus1 - 1]) + 1.f;
|
||||
|
||||
for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) {
|
||||
signal_spectrum[i] =
|
||||
SqrtFastApproximation(real[i] * real[i] + imag[i] * imag[i]) + 1.f;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute prior and post SNR.
|
||||
void ComputeSnr(rtc::ArrayView<const float, kFftSizeBy2Plus1> filter,
|
||||
rtc::ArrayView<const float> prev_signal_spectrum,
|
||||
rtc::ArrayView<const float> signal_spectrum,
|
||||
rtc::ArrayView<const float> prev_noise_spectrum,
|
||||
rtc::ArrayView<const float> noise_spectrum,
|
||||
rtc::ArrayView<float> prior_snr,
|
||||
rtc::ArrayView<float> post_snr) {
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
// Previous post SNR.
|
||||
// Previous estimate: based on previous frame with gain filter.
|
||||
float prev_estimate = prev_signal_spectrum[i] /
|
||||
(prev_noise_spectrum[i] + 0.0001f) * filter[i];
|
||||
// Post SNR.
|
||||
if (signal_spectrum[i] > noise_spectrum[i]) {
|
||||
post_snr[i] = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f;
|
||||
} else {
|
||||
post_snr[i] = 0.f;
|
||||
}
|
||||
// The directed decision estimate of the prior SNR is a sum the current and
|
||||
// previous estimates.
|
||||
prior_snr[i] = 0.98f * prev_estimate + (1.f - 0.98f) * post_snr[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Computes the attenuating gain for the noise suppression of the upper bands.
|
||||
float ComputeUpperBandsGain(
|
||||
float minimum_attenuating_gain,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> filter,
|
||||
rtc::ArrayView<const float> speech_probability,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> prev_analysis_signal_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) {
|
||||
// Average speech prob and filter gain for the end of the lowest band.
|
||||
constexpr int kNumAvgBins = 32;
|
||||
constexpr float kOneByNumAvgBins = 1.f / kNumAvgBins;
|
||||
|
||||
float avg_prob_speech = 0.f;
|
||||
float avg_filter_gain = 0.f;
|
||||
for (size_t i = kFftSizeBy2Plus1 - kNumAvgBins - 1; i < kFftSizeBy2Plus1 - 1;
|
||||
i++) {
|
||||
avg_prob_speech += speech_probability[i];
|
||||
avg_filter_gain += filter[i];
|
||||
}
|
||||
avg_prob_speech = avg_prob_speech * kOneByNumAvgBins;
|
||||
avg_filter_gain = avg_filter_gain * kOneByNumAvgBins;
|
||||
|
||||
// If the speech was suppressed by a component between Analyze and Process, an
|
||||
// example being by an AEC, it should not be considered speech for the purpose
|
||||
// of high band suppression. To that end, the speech probability is scaled
|
||||
// accordingly.
|
||||
float sum_analysis_spectrum = 0.f;
|
||||
float sum_processing_spectrum = 0.f;
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
sum_analysis_spectrum += prev_analysis_signal_spectrum[i];
|
||||
sum_processing_spectrum += signal_spectrum[i];
|
||||
}
|
||||
|
||||
// The magnitude spectrum computation enforces the spectrum to be strictly
|
||||
// positive.
|
||||
RTC_DCHECK_GT(sum_analysis_spectrum, 0.f);
|
||||
avg_prob_speech *= sum_processing_spectrum / sum_analysis_spectrum;
|
||||
|
||||
// Compute gain based on speech probability.
|
||||
float gain =
|
||||
0.5f * (1.f + static_cast<float>(tanh(2.f * avg_prob_speech - 1.f)));
|
||||
|
||||
// Combine gain with low band gain.
|
||||
if (avg_prob_speech >= 0.5f) {
|
||||
gain = 0.25f * gain + 0.75f * avg_filter_gain;
|
||||
} else {
|
||||
gain = 0.5f * gain + 0.5f * avg_filter_gain;
|
||||
}
|
||||
|
||||
// Make sure gain is within flooring range.
|
||||
return std::min(std::max(gain, minimum_attenuating_gain), 1.f);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
NoiseSuppressor::ChannelState::ChannelState(
|
||||
const SuppressionParams& suppression_params,
|
||||
size_t num_bands)
|
||||
: wiener_filter(suppression_params),
|
||||
noise_estimator(suppression_params),
|
||||
process_delay_memory(num_bands > 1 ? num_bands - 1 : 0) {
|
||||
analyze_analysis_memory.fill(0.f);
|
||||
prev_analysis_signal_spectrum.fill(1.f);
|
||||
process_analysis_memory.fill(0.f);
|
||||
process_synthesis_memory.fill(0.f);
|
||||
for (auto& d : process_delay_memory) {
|
||||
d.fill(0.f);
|
||||
}
|
||||
}
|
||||
|
||||
NoiseSuppressor::NoiseSuppressor(const NsConfig& config,
|
||||
size_t sample_rate_hz,
|
||||
size_t num_channels)
|
||||
: num_bands_(NumBandsForRate(sample_rate_hz)),
|
||||
num_channels_(num_channels),
|
||||
suppression_params_(config.target_level),
|
||||
filter_bank_states_heap_(NumChannelsOnHeap(num_channels_)),
|
||||
upper_band_gains_heap_(NumChannelsOnHeap(num_channels_)),
|
||||
energies_before_filtering_heap_(NumChannelsOnHeap(num_channels_)),
|
||||
gain_adjustments_heap_(NumChannelsOnHeap(num_channels_)),
|
||||
channels_(num_channels_) {
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
channels_[ch] =
|
||||
std::make_unique<ChannelState>(suppression_params_, num_bands_);
|
||||
}
|
||||
}
|
||||
|
||||
void NoiseSuppressor::AggregateWienerFilters(
|
||||
rtc::ArrayView<float, kFftSizeBy2Plus1> filter) const {
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> filter0 =
|
||||
channels_[0]->wiener_filter.get_filter();
|
||||
std::copy(filter0.begin(), filter0.end(), filter.begin());
|
||||
|
||||
for (size_t ch = 1; ch < num_channels_; ++ch) {
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> filter_ch =
|
||||
channels_[ch]->wiener_filter.get_filter();
|
||||
|
||||
for (size_t k = 0; k < kFftSizeBy2Plus1; ++k) {
|
||||
filter[k] = std::min(filter[k], filter_ch[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NoiseSuppressor::Analyze(const AudioBuffer& audio) {
|
||||
// Prepare the noise estimator for the analysis stage.
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
channels_[ch]->noise_estimator.PrepareAnalysis();
|
||||
}
|
||||
|
||||
// Check for zero frames.
|
||||
bool zero_frame = true;
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
rtc::ArrayView<const float, kNsFrameSize> y_band0(
|
||||
&audio.split_bands_const(ch)[0][0], kNsFrameSize);
|
||||
float energy = ComputeEnergyOfExtendedFrame(
|
||||
y_band0, channels_[ch]->analyze_analysis_memory);
|
||||
if (energy > 0.f) {
|
||||
zero_frame = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (zero_frame) {
|
||||
// We want to avoid updating statistics in this case:
|
||||
// Updating feature statistics when we have zeros only will cause
|
||||
// thresholds to move towards zero signal situations. This in turn has the
|
||||
// effect that once the signal is "turned on" (non-zero values) everything
|
||||
// will be treated as speech and there is no noise suppression effect.
|
||||
// Depending on the duration of the inactive signal it takes a
|
||||
// considerable amount of time for the system to learn what is noise and
|
||||
// what is speech.
|
||||
return;
|
||||
}
|
||||
|
||||
// Only update analysis counter for frames that are properly analyzed.
|
||||
if (++num_analyzed_frames_ < 0) {
|
||||
num_analyzed_frames_ = 0;
|
||||
}
|
||||
|
||||
// Analyze all channels.
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
std::unique_ptr<ChannelState>& ch_p = channels_[ch];
|
||||
rtc::ArrayView<const float, kNsFrameSize> y_band0(
|
||||
&audio.split_bands_const(ch)[0][0], kNsFrameSize);
|
||||
|
||||
// Form an extended frame and apply analysis filter bank windowing.
|
||||
std::array<float, kFftSize> extended_frame;
|
||||
FormExtendedFrame(y_band0, ch_p->analyze_analysis_memory, extended_frame);
|
||||
ApplyFilterBankWindow(extended_frame);
|
||||
|
||||
// Compute the magnitude spectrum.
|
||||
std::array<float, kFftSize> real;
|
||||
std::array<float, kFftSize> imag;
|
||||
fft_.Fft(extended_frame, real, imag);
|
||||
|
||||
std::array<float, kFftSizeBy2Plus1> signal_spectrum;
|
||||
ComputeMagnitudeSpectrum(real, imag, signal_spectrum);
|
||||
|
||||
// Compute energies.
|
||||
float signal_energy = 0.f;
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
signal_energy += real[i] * real[i] + imag[i] * imag[i];
|
||||
}
|
||||
signal_energy /= kFftSizeBy2Plus1;
|
||||
|
||||
float signal_spectral_sum = 0.f;
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
signal_spectral_sum += signal_spectrum[i];
|
||||
}
|
||||
|
||||
// Estimate the noise spectra and the probability estimates of speech
|
||||
// presence.
|
||||
ch_p->noise_estimator.PreUpdate(num_analyzed_frames_, signal_spectrum,
|
||||
signal_spectral_sum);
|
||||
|
||||
std::array<float, kFftSizeBy2Plus1> post_snr;
|
||||
std::array<float, kFftSizeBy2Plus1> prior_snr;
|
||||
ComputeSnr(ch_p->wiener_filter.get_filter(),
|
||||
ch_p->prev_analysis_signal_spectrum, signal_spectrum,
|
||||
ch_p->noise_estimator.get_prev_noise_spectrum(),
|
||||
ch_p->noise_estimator.get_noise_spectrum(), prior_snr, post_snr);
|
||||
|
||||
ch_p->speech_probability_estimator.Update(
|
||||
num_analyzed_frames_, prior_snr, post_snr,
|
||||
ch_p->noise_estimator.get_conservative_noise_spectrum(),
|
||||
signal_spectrum, signal_spectral_sum, signal_energy);
|
||||
|
||||
ch_p->noise_estimator.PostUpdate(
|
||||
ch_p->speech_probability_estimator.get_probability(), signal_spectrum);
|
||||
|
||||
// Store the magnitude spectrum to make it avalilable for the process
|
||||
// method.
|
||||
std::copy(signal_spectrum.begin(), signal_spectrum.end(),
|
||||
ch_p->prev_analysis_signal_spectrum.begin());
|
||||
}
|
||||
}
|
||||
|
||||
void NoiseSuppressor::Process(AudioBuffer* audio) {
|
||||
// Select the space for storing data during the processing.
|
||||
std::array<FilterBankState, kMaxNumChannelsOnStack> filter_bank_states_stack;
|
||||
rtc::ArrayView<FilterBankState> filter_bank_states(
|
||||
filter_bank_states_stack.data(), num_channels_);
|
||||
std::array<float, kMaxNumChannelsOnStack> upper_band_gains_stack;
|
||||
rtc::ArrayView<float> upper_band_gains(upper_band_gains_stack.data(),
|
||||
num_channels_);
|
||||
std::array<float, kMaxNumChannelsOnStack> energies_before_filtering_stack;
|
||||
rtc::ArrayView<float> energies_before_filtering(
|
||||
energies_before_filtering_stack.data(), num_channels_);
|
||||
std::array<float, kMaxNumChannelsOnStack> gain_adjustments_stack;
|
||||
rtc::ArrayView<float> gain_adjustments(gain_adjustments_stack.data(),
|
||||
num_channels_);
|
||||
if (NumChannelsOnHeap(num_channels_) > 0) {
|
||||
// If the stack-allocated space is too small, use the heap for storing the
|
||||
// data.
|
||||
filter_bank_states = rtc::ArrayView<FilterBankState>(
|
||||
filter_bank_states_heap_.data(), num_channels_);
|
||||
upper_band_gains =
|
||||
rtc::ArrayView<float>(upper_band_gains_heap_.data(), num_channels_);
|
||||
energies_before_filtering = rtc::ArrayView<float>(
|
||||
energies_before_filtering_heap_.data(), num_channels_);
|
||||
gain_adjustments =
|
||||
rtc::ArrayView<float>(gain_adjustments_heap_.data(), num_channels_);
|
||||
}
|
||||
|
||||
// Compute the suppression filters for all channels.
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
// Form an extended frame and apply analysis filter bank windowing.
|
||||
rtc::ArrayView<float, kNsFrameSize> y_band0(&audio->split_bands(ch)[0][0],
|
||||
kNsFrameSize);
|
||||
|
||||
FormExtendedFrame(y_band0, channels_[ch]->process_analysis_memory,
|
||||
filter_bank_states[ch].extended_frame);
|
||||
|
||||
ApplyFilterBankWindow(filter_bank_states[ch].extended_frame);
|
||||
|
||||
energies_before_filtering[ch] =
|
||||
ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame);
|
||||
|
||||
// Perform filter bank analysis and compute the magnitude spectrum.
|
||||
fft_.Fft(filter_bank_states[ch].extended_frame, filter_bank_states[ch].real,
|
||||
filter_bank_states[ch].imag);
|
||||
|
||||
std::array<float, kFftSizeBy2Plus1> signal_spectrum;
|
||||
ComputeMagnitudeSpectrum(filter_bank_states[ch].real,
|
||||
filter_bank_states[ch].imag, signal_spectrum);
|
||||
|
||||
// Compute the frequency domain gain filter for noise attenuation.
|
||||
channels_[ch]->wiener_filter.Update(
|
||||
num_analyzed_frames_,
|
||||
channels_[ch]->noise_estimator.get_noise_spectrum(),
|
||||
channels_[ch]->noise_estimator.get_prev_noise_spectrum(),
|
||||
channels_[ch]->noise_estimator.get_parametric_noise_spectrum(),
|
||||
signal_spectrum);
|
||||
|
||||
if (num_bands_ > 1) {
|
||||
// Compute the time-domain gain for attenuating the noise in the upper
|
||||
// bands.
|
||||
|
||||
upper_band_gains[ch] = ComputeUpperBandsGain(
|
||||
suppression_params_.minimum_attenuating_gain,
|
||||
channels_[ch]->wiener_filter.get_filter(),
|
||||
channels_[ch]->speech_probability_estimator.get_probability(),
|
||||
channels_[ch]->prev_analysis_signal_spectrum, signal_spectrum);
|
||||
}
|
||||
}
|
||||
|
||||
// Only do the below processing if the output of the audio processing module
|
||||
// is used.
|
||||
if (!capture_output_used_) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Aggregate the Wiener filters for all channels.
|
||||
std::array<float, kFftSizeBy2Plus1> filter_data;
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> filter = filter_data;
|
||||
if (num_channels_ == 1) {
|
||||
filter = channels_[0]->wiener_filter.get_filter();
|
||||
} else {
|
||||
AggregateWienerFilters(filter_data);
|
||||
}
|
||||
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
// Apply the filter to the lower band.
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
filter_bank_states[ch].real[i] *= filter[i];
|
||||
filter_bank_states[ch].imag[i] *= filter[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Perform filter bank synthesis
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
fft_.Ifft(filter_bank_states[ch].real, filter_bank_states[ch].imag,
|
||||
filter_bank_states[ch].extended_frame);
|
||||
}
|
||||
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
const float energy_after_filtering =
|
||||
ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame);
|
||||
|
||||
// Apply synthesis window.
|
||||
ApplyFilterBankWindow(filter_bank_states[ch].extended_frame);
|
||||
|
||||
// Compute the adjustment of the noise attenuation filter based on the
|
||||
// effect of the attenuation.
|
||||
gain_adjustments[ch] =
|
||||
channels_[ch]->wiener_filter.ComputeOverallScalingFactor(
|
||||
num_analyzed_frames_,
|
||||
channels_[ch]->speech_probability_estimator.get_prior_probability(),
|
||||
energies_before_filtering[ch], energy_after_filtering);
|
||||
}
|
||||
|
||||
// Select and apply adjustment of the noise attenuation filter based on the
|
||||
// effect of the attenuation.
|
||||
float gain_adjustment = gain_adjustments[0];
|
||||
for (size_t ch = 1; ch < num_channels_; ++ch) {
|
||||
gain_adjustment = std::min(gain_adjustment, gain_adjustments[ch]);
|
||||
}
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
for (size_t i = 0; i < kFftSize; ++i) {
|
||||
filter_bank_states[ch].extended_frame[i] =
|
||||
gain_adjustment * filter_bank_states[ch].extended_frame[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Use overlap-and-add to form the output frame of the lowest band.
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
rtc::ArrayView<float, kNsFrameSize> y_band0(&audio->split_bands(ch)[0][0],
|
||||
kNsFrameSize);
|
||||
OverlapAndAdd(filter_bank_states[ch].extended_frame,
|
||||
channels_[ch]->process_synthesis_memory, y_band0);
|
||||
}
|
||||
|
||||
if (num_bands_ > 1) {
|
||||
// Select the noise attenuating gain to apply to the upper band.
|
||||
float upper_band_gain = upper_band_gains[0];
|
||||
for (size_t ch = 1; ch < num_channels_; ++ch) {
|
||||
upper_band_gain = std::min(upper_band_gain, upper_band_gains[ch]);
|
||||
}
|
||||
|
||||
// Process the upper bands.
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
for (size_t b = 1; b < num_bands_; ++b) {
|
||||
// Delay the upper bands to match the delay of the filterbank applied to
|
||||
// the lowest band.
|
||||
rtc::ArrayView<float, kNsFrameSize> y_band(
|
||||
&audio->split_bands(ch)[b][0], kNsFrameSize);
|
||||
std::array<float, kNsFrameSize> delayed_frame;
|
||||
DelaySignal(y_band, channels_[ch]->process_delay_memory[b - 1],
|
||||
delayed_frame);
|
||||
|
||||
// Apply the time-domain noise-attenuating gain.
|
||||
for (size_t j = 0; j < kNsFrameSize; j++) {
|
||||
y_band[j] = upper_band_gain * delayed_frame[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Limit the output the allowed range.
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
for (size_t b = 0; b < num_bands_; ++b) {
|
||||
rtc::ArrayView<float, kNsFrameSize> y_band(&audio->split_bands(ch)[b][0],
|
||||
kNsFrameSize);
|
||||
for (size_t j = 0; j < kNsFrameSize; j++) {
|
||||
y_band[j] = std::min(std::max(y_band[j], -32768.f), 32767.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
92
VocieProcess/modules/audio_processing/ns/noise_suppressor.h
Normal file
92
VocieProcess/modules/audio_processing/ns/noise_suppressor.h
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/ns/noise_estimator.h"
|
||||
#include "modules/audio_processing/ns/ns_common.h"
|
||||
#include "modules/audio_processing/ns/ns_config.h"
|
||||
#include "modules/audio_processing/ns/ns_fft.h"
|
||||
#include "modules/audio_processing/ns/speech_probability_estimator.h"
|
||||
#include "modules/audio_processing/ns/wiener_filter.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for suppressing noise in a signal.
|
||||
class NoiseSuppressor {
|
||||
public:
|
||||
NoiseSuppressor(const NsConfig& config,
|
||||
size_t sample_rate_hz,
|
||||
size_t num_channels);
|
||||
NoiseSuppressor(const NoiseSuppressor&) = delete;
|
||||
NoiseSuppressor& operator=(const NoiseSuppressor&) = delete;
|
||||
|
||||
// Analyses the signal (typically applied before the AEC to avoid analyzing
|
||||
// any comfort noise signal).
|
||||
void Analyze(const AudioBuffer& audio);
|
||||
|
||||
// Applies noise suppression.
|
||||
void Process(AudioBuffer* audio);
|
||||
|
||||
// Specifies whether the capture output will be used. The purpose of this is
|
||||
// to allow the noise suppressor to deactivate some of the processing when the
|
||||
// resulting output is anyway not used, for instance when the endpoint is
|
||||
// muted.
|
||||
void SetCaptureOutputUsage(bool capture_output_used) {
|
||||
capture_output_used_ = capture_output_used;
|
||||
}
|
||||
|
||||
private:
|
||||
const size_t num_bands_;
|
||||
const size_t num_channels_;
|
||||
const SuppressionParams suppression_params_;
|
||||
int32_t num_analyzed_frames_ = -1;
|
||||
NrFft fft_;
|
||||
bool capture_output_used_ = true;
|
||||
|
||||
struct ChannelState {
|
||||
ChannelState(const SuppressionParams& suppression_params, size_t num_bands);
|
||||
|
||||
SpeechProbabilityEstimator speech_probability_estimator;
|
||||
WienerFilter wiener_filter;
|
||||
NoiseEstimator noise_estimator;
|
||||
std::array<float, kFftSizeBy2Plus1> prev_analysis_signal_spectrum;
|
||||
std::array<float, kFftSize - kNsFrameSize> analyze_analysis_memory;
|
||||
std::array<float, kOverlapSize> process_analysis_memory;
|
||||
std::array<float, kOverlapSize> process_synthesis_memory;
|
||||
std::vector<std::array<float, kOverlapSize>> process_delay_memory;
|
||||
};
|
||||
|
||||
struct FilterBankState {
|
||||
std::array<float, kFftSize> real;
|
||||
std::array<float, kFftSize> imag;
|
||||
std::array<float, kFftSize> extended_frame;
|
||||
};
|
||||
|
||||
std::vector<FilterBankState> filter_bank_states_heap_;
|
||||
std::vector<float> upper_band_gains_heap_;
|
||||
std::vector<float> energies_before_filtering_heap_;
|
||||
std::vector<float> gain_adjustments_heap_;
|
||||
std::vector<std::unique_ptr<ChannelState>> channels_;
|
||||
|
||||
// Aggregates the Wiener filters into a single filter to use.
|
||||
void AggregateWienerFilters(
|
||||
rtc::ArrayView<float, kFftSizeBy2Plus1> filter) const;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_
|
34
VocieProcess/modules/audio_processing/ns/ns_common.h
Normal file
34
VocieProcess/modules/audio_processing/ns/ns_common.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
constexpr size_t kFftSize = 256;
|
||||
constexpr size_t kFftSizeBy2Plus1 = kFftSize / 2 + 1;
|
||||
constexpr size_t kNsFrameSize = 160;
|
||||
constexpr size_t kOverlapSize = kFftSize - kNsFrameSize;
|
||||
|
||||
constexpr int kShortStartupPhaseBlocks = 50;
|
||||
constexpr int kLongStartupPhaseBlocks = 200;
|
||||
constexpr int kFeatureUpdateWindowSize = 500;
|
||||
|
||||
constexpr float kLtrFeatureThr = 0.5f;
|
||||
constexpr float kBinSizeLrt = 0.1f;
|
||||
constexpr float kBinSizeSpecFlat = 0.05f;
|
||||
constexpr float kBinSizeSpecDiff = 0.1f;
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_
|
24
VocieProcess/modules/audio_processing/ns/ns_config.h
Normal file
24
VocieProcess/modules/audio_processing/ns/ns_config.h
Normal file
@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Config struct for the noise suppressor
|
||||
struct NsConfig {
|
||||
enum class SuppressionLevel { k6dB, k12dB, k18dB, k21dB };
|
||||
SuppressionLevel target_level = SuppressionLevel::k12dB;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_
|
64
VocieProcess/modules/audio_processing/ns/ns_fft.cc
Normal file
64
VocieProcess/modules/audio_processing/ns/ns_fft.cc
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/ns_fft.h"
|
||||
|
||||
#include "common_audio/third_party/ooura/fft_size_256/fft4g.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
NrFft::NrFft() : bit_reversal_state_(kFftSize / 2), tables_(kFftSize / 2) {
|
||||
// Initialize WebRtc_rdt (setting (bit_reversal_state_[0] to 0 triggers
|
||||
// initialization)
|
||||
bit_reversal_state_[0] = 0.f;
|
||||
std::array<float, kFftSize> tmp_buffer;
|
||||
tmp_buffer.fill(0.f);
|
||||
WebRtc_rdft(kFftSize, 1, tmp_buffer.data(), bit_reversal_state_.data(),
|
||||
tables_.data());
|
||||
}
|
||||
|
||||
void NrFft::Fft(rtc::ArrayView<float, kFftSize> time_data,
|
||||
rtc::ArrayView<float, kFftSize> real,
|
||||
rtc::ArrayView<float, kFftSize> imag) {
|
||||
WebRtc_rdft(kFftSize, 1, time_data.data(), bit_reversal_state_.data(),
|
||||
tables_.data());
|
||||
|
||||
imag[0] = 0;
|
||||
real[0] = time_data[0];
|
||||
|
||||
imag[kFftSizeBy2Plus1 - 1] = 0;
|
||||
real[kFftSizeBy2Plus1 - 1] = time_data[1];
|
||||
|
||||
for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) {
|
||||
real[i] = time_data[2 * i];
|
||||
imag[i] = time_data[2 * i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void NrFft::Ifft(rtc::ArrayView<const float> real,
|
||||
rtc::ArrayView<const float> imag,
|
||||
rtc::ArrayView<float> time_data) {
|
||||
time_data[0] = real[0];
|
||||
time_data[1] = real[kFftSizeBy2Plus1 - 1];
|
||||
for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) {
|
||||
time_data[2 * i] = real[i];
|
||||
time_data[2 * i + 1] = imag[i];
|
||||
}
|
||||
WebRtc_rdft(kFftSize, -1, time_data.data(), bit_reversal_state_.data(),
|
||||
tables_.data());
|
||||
|
||||
// Scale the output
|
||||
constexpr float kScaling = 2.f / kFftSize;
|
||||
for (float& d : time_data) {
|
||||
d *= kScaling;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
45
VocieProcess/modules/audio_processing/ns/ns_fft.h
Normal file
45
VocieProcess/modules/audio_processing/ns/ns_fft.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/ns/ns_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Wrapper class providing 256 point FFT functionality.
|
||||
class NrFft {
|
||||
public:
|
||||
NrFft();
|
||||
NrFft(const NrFft&) = delete;
|
||||
NrFft& operator=(const NrFft&) = delete;
|
||||
|
||||
// Transforms the signal from time to frequency domain.
|
||||
void Fft(rtc::ArrayView<float, kFftSize> time_data,
|
||||
rtc::ArrayView<float, kFftSize> real,
|
||||
rtc::ArrayView<float, kFftSize> imag);
|
||||
|
||||
// Transforms the signal from frequency to time domain.
|
||||
void Ifft(rtc::ArrayView<const float> real,
|
||||
rtc::ArrayView<const float> imag,
|
||||
rtc::ArrayView<float> time_data);
|
||||
|
||||
private:
|
||||
std::vector<size_t> bit_reversal_state_;
|
||||
std::vector<float> tables_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_
|
@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/prior_signal_model.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
PriorSignalModel::PriorSignalModel(float lrt_initial_value)
|
||||
: lrt(lrt_initial_value) {}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Struct for storing the prior signal model parameters.
|
||||
struct PriorSignalModel {
|
||||
explicit PriorSignalModel(float lrt_initial_value);
|
||||
PriorSignalModel(const PriorSignalModel&) = delete;
|
||||
PriorSignalModel& operator=(const PriorSignalModel&) = delete;
|
||||
|
||||
float lrt;
|
||||
float flatness_threshold = .5f;
|
||||
float template_diff_threshold = .5f;
|
||||
float lrt_weighting = 1.f;
|
||||
float flatness_weighting = 0.f;
|
||||
float difference_weighting = 0.f;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_
|
@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/prior_signal_model_estimator.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/ns/fast_math.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Identifies the first of the two largest peaks in the histogram.
|
||||
void FindFirstOfTwoLargestPeaks(
|
||||
float bin_size,
|
||||
rtc::ArrayView<const int, kHistogramSize> spectral_flatness,
|
||||
float* peak_position,
|
||||
int* peak_weight) {
|
||||
RTC_DCHECK(peak_position);
|
||||
RTC_DCHECK(peak_weight);
|
||||
|
||||
int peak_value = 0;
|
||||
int secondary_peak_value = 0;
|
||||
*peak_position = 0.f;
|
||||
float secondary_peak_position = 0.f;
|
||||
*peak_weight = 0;
|
||||
int secondary_peak_weight = 0;
|
||||
|
||||
// Identify the two largest peaks.
|
||||
for (int i = 0; i < kHistogramSize; ++i) {
|
||||
const float bin_mid = (i + 0.5f) * bin_size;
|
||||
if (spectral_flatness[i] > peak_value) {
|
||||
// Found new "first" peak candidate.
|
||||
secondary_peak_value = peak_value;
|
||||
secondary_peak_weight = *peak_weight;
|
||||
secondary_peak_position = *peak_position;
|
||||
|
||||
peak_value = spectral_flatness[i];
|
||||
*peak_weight = spectral_flatness[i];
|
||||
*peak_position = bin_mid;
|
||||
} else if (spectral_flatness[i] > secondary_peak_value) {
|
||||
// Found new "second" peak candidate.
|
||||
secondary_peak_value = spectral_flatness[i];
|
||||
secondary_peak_weight = spectral_flatness[i];
|
||||
secondary_peak_position = bin_mid;
|
||||
}
|
||||
}
|
||||
|
||||
// Merge the peaks if they are close.
|
||||
if ((fabs(secondary_peak_position - *peak_position) < 2 * bin_size) &&
|
||||
(secondary_peak_weight > 0.5f * (*peak_weight))) {
|
||||
*peak_weight += secondary_peak_weight;
|
||||
*peak_position = 0.5f * (*peak_position + secondary_peak_position);
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateLrt(rtc::ArrayView<const int, kHistogramSize> lrt_histogram,
|
||||
float* prior_model_lrt,
|
||||
bool* low_lrt_fluctuations) {
|
||||
RTC_DCHECK(prior_model_lrt);
|
||||
RTC_DCHECK(low_lrt_fluctuations);
|
||||
|
||||
float average = 0.f;
|
||||
float average_compl = 0.f;
|
||||
float average_squared = 0.f;
|
||||
int count = 0;
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
float bin_mid = (i + 0.5f) * kBinSizeLrt;
|
||||
average += lrt_histogram[i] * bin_mid;
|
||||
count += lrt_histogram[i];
|
||||
}
|
||||
if (count > 0) {
|
||||
average = average / count;
|
||||
}
|
||||
|
||||
for (int i = 0; i < kHistogramSize; ++i) {
|
||||
float bin_mid = (i + 0.5f) * kBinSizeLrt;
|
||||
average_squared += lrt_histogram[i] * bin_mid * bin_mid;
|
||||
average_compl += lrt_histogram[i] * bin_mid;
|
||||
}
|
||||
constexpr float kOneFeatureUpdateWindowSize = 1.f / kFeatureUpdateWindowSize;
|
||||
average_squared = average_squared * kOneFeatureUpdateWindowSize;
|
||||
average_compl = average_compl * kOneFeatureUpdateWindowSize;
|
||||
|
||||
// Fluctuation limit of LRT feature.
|
||||
*low_lrt_fluctuations = average_squared - average * average_compl < 0.05f;
|
||||
|
||||
// Get threshold for LRT feature.
|
||||
constexpr float kMaxLrt = 1.f;
|
||||
constexpr float kMinLrt = .2f;
|
||||
if (*low_lrt_fluctuations) {
|
||||
// Very low fluctuation, so likely noise.
|
||||
*prior_model_lrt = kMaxLrt;
|
||||
} else {
|
||||
*prior_model_lrt = std::min(kMaxLrt, std::max(kMinLrt, 1.2f * average));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
PriorSignalModelEstimator::PriorSignalModelEstimator(float lrt_initial_value)
|
||||
: prior_model_(lrt_initial_value) {}
|
||||
|
||||
// Extract thresholds for feature parameters and computes the threshold/weights.
|
||||
void PriorSignalModelEstimator::Update(const Histograms& histograms) {
|
||||
bool low_lrt_fluctuations;
|
||||
UpdateLrt(histograms.get_lrt(), &prior_model_.lrt, &low_lrt_fluctuations);
|
||||
|
||||
// For spectral flatness and spectral difference: compute the main peaks of
|
||||
// the histograms.
|
||||
float spectral_flatness_peak_position;
|
||||
int spectral_flatness_peak_weight;
|
||||
FindFirstOfTwoLargestPeaks(
|
||||
kBinSizeSpecFlat, histograms.get_spectral_flatness(),
|
||||
&spectral_flatness_peak_position, &spectral_flatness_peak_weight);
|
||||
|
||||
float spectral_diff_peak_position = 0.f;
|
||||
int spectral_diff_peak_weight = 0;
|
||||
FindFirstOfTwoLargestPeaks(kBinSizeSpecDiff, histograms.get_spectral_diff(),
|
||||
&spectral_diff_peak_position,
|
||||
&spectral_diff_peak_weight);
|
||||
|
||||
// Reject if weight of peaks is not large enough, or peak value too small.
|
||||
// Peak limit for spectral flatness (varies between 0 and 1).
|
||||
const int use_spec_flat = spectral_flatness_peak_weight < 0.3f * 500 ||
|
||||
spectral_flatness_peak_position < 0.6f
|
||||
? 0
|
||||
: 1;
|
||||
|
||||
// Reject if weight of peaks is not large enough or if fluctuation of the LRT
|
||||
// feature are very low, indicating a noise state.
|
||||
const int use_spec_diff =
|
||||
spectral_diff_peak_weight < 0.3f * 500 || low_lrt_fluctuations ? 0 : 1;
|
||||
|
||||
// Update the model.
|
||||
prior_model_.template_diff_threshold = 1.2f * spectral_diff_peak_position;
|
||||
prior_model_.template_diff_threshold =
|
||||
std::min(1.f, std::max(0.16f, prior_model_.template_diff_threshold));
|
||||
|
||||
float one_by_feature_sum = 1.f / (1.f + use_spec_flat + use_spec_diff);
|
||||
prior_model_.lrt_weighting = one_by_feature_sum;
|
||||
|
||||
if (use_spec_flat == 1) {
|
||||
prior_model_.flatness_threshold = 0.9f * spectral_flatness_peak_position;
|
||||
prior_model_.flatness_threshold =
|
||||
std::min(.95f, std::max(0.1f, prior_model_.flatness_threshold));
|
||||
prior_model_.flatness_weighting = one_by_feature_sum;
|
||||
} else {
|
||||
prior_model_.flatness_weighting = 0.f;
|
||||
}
|
||||
|
||||
if (use_spec_diff == 1) {
|
||||
prior_model_.difference_weighting = one_by_feature_sum;
|
||||
} else {
|
||||
prior_model_.difference_weighting = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_
|
||||
|
||||
#include "modules/audio_processing/ns/histograms.h"
|
||||
#include "modules/audio_processing/ns/prior_signal_model.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Estimator of the prior signal model parameters.
|
||||
class PriorSignalModelEstimator {
|
||||
public:
|
||||
explicit PriorSignalModelEstimator(float lrt_initial_value);
|
||||
PriorSignalModelEstimator(const PriorSignalModelEstimator&) = delete;
|
||||
PriorSignalModelEstimator& operator=(const PriorSignalModelEstimator&) =
|
||||
delete;
|
||||
|
||||
// Updates the model estimate.
|
||||
void Update(const Histograms& h);
|
||||
|
||||
// Returns the estimated model.
|
||||
const PriorSignalModel& get_prior_model() const { return prior_model_; }
|
||||
|
||||
private:
|
||||
PriorSignalModel prior_model_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_
|
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/quantile_noise_estimator.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/ns/fast_math.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
QuantileNoiseEstimator::QuantileNoiseEstimator() {
|
||||
quantile_.fill(0.f);
|
||||
density_.fill(0.3f);
|
||||
log_quantile_.fill(8.f);
|
||||
|
||||
constexpr float kOneBySimult = 1.f / kSimult;
|
||||
for (size_t i = 0; i < kSimult; ++i) {
|
||||
counter_[i] = floor(kLongStartupPhaseBlocks * (i + 1.f) * kOneBySimult);
|
||||
}
|
||||
}
|
||||
|
||||
void QuantileNoiseEstimator::Estimate(
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
|
||||
rtc::ArrayView<float, kFftSizeBy2Plus1> noise_spectrum) {
|
||||
std::array<float, kFftSizeBy2Plus1> log_spectrum;
|
||||
LogApproximation(signal_spectrum, log_spectrum);
|
||||
|
||||
int quantile_index_to_return = -1;
|
||||
// Loop over simultaneous estimates.
|
||||
for (int s = 0, k = 0; s < kSimult;
|
||||
++s, k += static_cast<int>(kFftSizeBy2Plus1)) {
|
||||
const float one_by_counter_plus_1 = 1.f / (counter_[s] + 1.f);
|
||||
for (int i = 0, j = k; i < static_cast<int>(kFftSizeBy2Plus1); ++i, ++j) {
|
||||
// Update log quantile estimate.
|
||||
const float delta = density_[j] > 1.f ? 40.f / density_[j] : 40.f;
|
||||
|
||||
const float multiplier = delta * one_by_counter_plus_1;
|
||||
if (log_spectrum[i] > log_quantile_[j]) {
|
||||
log_quantile_[j] += 0.25f * multiplier;
|
||||
} else {
|
||||
log_quantile_[j] -= 0.75f * multiplier;
|
||||
}
|
||||
|
||||
// Update density estimate.
|
||||
constexpr float kWidth = 0.01f;
|
||||
constexpr float kOneByWidthPlus2 = 1.f / (2.f * kWidth);
|
||||
if (fabs(log_spectrum[i] - log_quantile_[j]) < kWidth) {
|
||||
density_[j] = (counter_[s] * density_[j] + kOneByWidthPlus2) *
|
||||
one_by_counter_plus_1;
|
||||
}
|
||||
}
|
||||
|
||||
if (counter_[s] >= kLongStartupPhaseBlocks) {
|
||||
counter_[s] = 0;
|
||||
if (num_updates_ >= kLongStartupPhaseBlocks) {
|
||||
quantile_index_to_return = k;
|
||||
}
|
||||
}
|
||||
|
||||
++counter_[s];
|
||||
}
|
||||
|
||||
// Sequentially update the noise during startup.
|
||||
if (num_updates_ < kLongStartupPhaseBlocks) {
|
||||
// Use the last "s" to get noise during startup that differ from zero.
|
||||
quantile_index_to_return = kFftSizeBy2Plus1 * (kSimult - 1);
|
||||
++num_updates_;
|
||||
}
|
||||
|
||||
if (quantile_index_to_return >= 0) {
|
||||
ExpApproximation(
|
||||
rtc::ArrayView<const float>(&log_quantile_[quantile_index_to_return],
|
||||
kFftSizeBy2Plus1),
|
||||
quantile_);
|
||||
}
|
||||
|
||||
std::copy(quantile_.begin(), quantile_.end(), noise_spectrum.begin());
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/ns/ns_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
constexpr int kSimult = 3;
|
||||
|
||||
// For quantile noise estimation.
|
||||
class QuantileNoiseEstimator {
|
||||
public:
|
||||
QuantileNoiseEstimator();
|
||||
QuantileNoiseEstimator(const QuantileNoiseEstimator&) = delete;
|
||||
QuantileNoiseEstimator& operator=(const QuantileNoiseEstimator&) = delete;
|
||||
|
||||
// Estimate noise.
|
||||
void Estimate(rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
|
||||
rtc::ArrayView<float, kFftSizeBy2Plus1> noise_spectrum);
|
||||
|
||||
private:
|
||||
std::array<float, kSimult * kFftSizeBy2Plus1> density_;
|
||||
std::array<float, kSimult * kFftSizeBy2Plus1> log_quantile_;
|
||||
std::array<float, kFftSizeBy2Plus1> quantile_;
|
||||
std::array<int, kSimult> counter_;
|
||||
int num_updates_ = 1;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_
|
24
VocieProcess/modules/audio_processing/ns/signal_model.cc
Normal file
24
VocieProcess/modules/audio_processing/ns/signal_model.cc
Normal file
@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/signal_model.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
SignalModel::SignalModel() {
|
||||
constexpr float kSfFeatureThr = 0.5f;
|
||||
|
||||
lrt = kLtrFeatureThr;
|
||||
spectral_flatness = kSfFeatureThr;
|
||||
spectral_diff = kSfFeatureThr;
|
||||
avg_log_lrt.fill(kLtrFeatureThr);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
34
VocieProcess/modules/audio_processing/ns/signal_model.h
Normal file
34
VocieProcess/modules/audio_processing/ns/signal_model.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "modules/audio_processing/ns/ns_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
struct SignalModel {
|
||||
SignalModel();
|
||||
SignalModel(const SignalModel&) = delete;
|
||||
SignalModel& operator=(const SignalModel&) = delete;
|
||||
|
||||
float lrt;
|
||||
float spectral_diff;
|
||||
float spectral_flatness;
|
||||
// Log LRT factor with time-smoothing.
|
||||
std::array<float, kFftSizeBy2Plus1> avg_log_lrt;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_
|
@ -0,0 +1,175 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/signal_model_estimator.h"
|
||||
|
||||
#include "modules/audio_processing/ns/fast_math.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1;
|
||||
|
||||
// Computes the difference measure between input spectrum and a template/learned
|
||||
// noise spectrum.
|
||||
float ComputeSpectralDiff(
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
|
||||
float signal_spectral_sum,
|
||||
float diff_normalization) {
|
||||
// spectral_diff = var(signal_spectrum) - cov(signal_spectrum, magnAvgPause)^2
|
||||
// / var(magnAvgPause)
|
||||
|
||||
// Compute average quantities.
|
||||
float noise_average = 0.f;
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
// Conservative smooth noise spectrum from pause frames.
|
||||
noise_average += conservative_noise_spectrum[i];
|
||||
}
|
||||
noise_average = noise_average * kOneByFftSizeBy2Plus1;
|
||||
float signal_average = signal_spectral_sum * kOneByFftSizeBy2Plus1;
|
||||
|
||||
// Compute variance and covariance quantities.
|
||||
float covariance = 0.f;
|
||||
float noise_variance = 0.f;
|
||||
float signal_variance = 0.f;
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
float signal_diff = signal_spectrum[i] - signal_average;
|
||||
float noise_diff = conservative_noise_spectrum[i] - noise_average;
|
||||
covariance += signal_diff * noise_diff;
|
||||
noise_variance += noise_diff * noise_diff;
|
||||
signal_variance += signal_diff * signal_diff;
|
||||
}
|
||||
covariance *= kOneByFftSizeBy2Plus1;
|
||||
noise_variance *= kOneByFftSizeBy2Plus1;
|
||||
signal_variance *= kOneByFftSizeBy2Plus1;
|
||||
|
||||
// Update of average magnitude spectrum.
|
||||
float spectral_diff =
|
||||
signal_variance - (covariance * covariance) / (noise_variance + 0.0001f);
|
||||
// Normalize.
|
||||
return spectral_diff / (diff_normalization + 0.0001f);
|
||||
}
|
||||
|
||||
// Updates the spectral flatness based on the input spectrum.
|
||||
void UpdateSpectralFlatness(
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
|
||||
float signal_spectral_sum,
|
||||
float* spectral_flatness) {
|
||||
RTC_DCHECK(spectral_flatness);
|
||||
|
||||
// Compute log of ratio of the geometric to arithmetic mean (handle the log(0)
|
||||
// separately).
|
||||
constexpr float kAveraging = 0.3f;
|
||||
float avg_spect_flatness_num = 0.f;
|
||||
for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) {
|
||||
if (signal_spectrum[i] == 0.f) {
|
||||
*spectral_flatness -= kAveraging * (*spectral_flatness);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) {
|
||||
avg_spect_flatness_num += LogApproximation(signal_spectrum[i]);
|
||||
}
|
||||
|
||||
float avg_spect_flatness_denom = signal_spectral_sum - signal_spectrum[0];
|
||||
|
||||
avg_spect_flatness_denom = avg_spect_flatness_denom * kOneByFftSizeBy2Plus1;
|
||||
avg_spect_flatness_num = avg_spect_flatness_num * kOneByFftSizeBy2Plus1;
|
||||
|
||||
float spectral_tmp =
|
||||
ExpApproximation(avg_spect_flatness_num) / avg_spect_flatness_denom;
|
||||
|
||||
// Time-avg update of spectral flatness feature.
|
||||
*spectral_flatness += kAveraging * (spectral_tmp - *spectral_flatness);
|
||||
}
|
||||
|
||||
// Updates the log LRT measures.
|
||||
void UpdateSpectralLrt(rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
|
||||
rtc::ArrayView<float, kFftSizeBy2Plus1> avg_log_lrt,
|
||||
float* lrt) {
|
||||
RTC_DCHECK(lrt);
|
||||
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
float tmp1 = 1.f + 2.f * prior_snr[i];
|
||||
float tmp2 = 2.f * prior_snr[i] / (tmp1 + 0.0001f);
|
||||
float bessel_tmp = (post_snr[i] + 1.f) * tmp2;
|
||||
avg_log_lrt[i] +=
|
||||
.5f * (bessel_tmp - LogApproximation(tmp1) - avg_log_lrt[i]);
|
||||
}
|
||||
|
||||
float log_lrt_time_avg_k_sum = 0.f;
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
log_lrt_time_avg_k_sum += avg_log_lrt[i];
|
||||
}
|
||||
*lrt = log_lrt_time_avg_k_sum * kOneByFftSizeBy2Plus1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
SignalModelEstimator::SignalModelEstimator()
|
||||
: prior_model_estimator_(kLtrFeatureThr) {}
|
||||
|
||||
void SignalModelEstimator::AdjustNormalization(int32_t num_analyzed_frames,
|
||||
float signal_energy) {
|
||||
diff_normalization_ *= num_analyzed_frames;
|
||||
diff_normalization_ += signal_energy;
|
||||
diff_normalization_ /= (num_analyzed_frames + 1);
|
||||
}
|
||||
|
||||
// Update the noise features.
|
||||
void SignalModelEstimator::Update(
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
|
||||
float signal_spectral_sum,
|
||||
float signal_energy) {
|
||||
// Compute spectral flatness on input spectrum.
|
||||
UpdateSpectralFlatness(signal_spectrum, signal_spectral_sum,
|
||||
&features_.spectral_flatness);
|
||||
|
||||
// Compute difference of input spectrum with learned/estimated noise spectrum.
|
||||
float spectral_diff =
|
||||
ComputeSpectralDiff(conservative_noise_spectrum, signal_spectrum,
|
||||
signal_spectral_sum, diff_normalization_);
|
||||
// Compute time-avg update of difference feature.
|
||||
features_.spectral_diff += 0.3f * (spectral_diff - features_.spectral_diff);
|
||||
|
||||
signal_energy_sum_ += signal_energy;
|
||||
|
||||
// Compute histograms for parameter decisions (thresholds and weights for
|
||||
// features). Parameters are extracted periodically.
|
||||
if (--histogram_analysis_counter_ > 0) {
|
||||
histograms_.Update(features_);
|
||||
} else {
|
||||
// Compute model parameters.
|
||||
prior_model_estimator_.Update(histograms_);
|
||||
|
||||
// Clear histograms for next update.
|
||||
histograms_.Clear();
|
||||
|
||||
histogram_analysis_counter_ = kFeatureUpdateWindowSize;
|
||||
|
||||
// Update every window:
|
||||
// Compute normalization for the spectral difference for next estimation.
|
||||
signal_energy_sum_ = signal_energy_sum_ / kFeatureUpdateWindowSize;
|
||||
diff_normalization_ = 0.5f * (signal_energy_sum_ + diff_normalization_);
|
||||
signal_energy_sum_ = 0.f;
|
||||
}
|
||||
|
||||
// Compute the LRT.
|
||||
UpdateSpectralLrt(prior_snr, post_snr, features_.avg_log_lrt, &features_.lrt);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/ns/histograms.h"
|
||||
#include "modules/audio_processing/ns/ns_common.h"
|
||||
#include "modules/audio_processing/ns/prior_signal_model.h"
|
||||
#include "modules/audio_processing/ns/prior_signal_model_estimator.h"
|
||||
#include "modules/audio_processing/ns/signal_model.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class SignalModelEstimator {
|
||||
public:
|
||||
SignalModelEstimator();
|
||||
SignalModelEstimator(const SignalModelEstimator&) = delete;
|
||||
SignalModelEstimator& operator=(const SignalModelEstimator&) = delete;
|
||||
|
||||
// Compute signal normalization during the initial startup phase.
|
||||
void AdjustNormalization(int32_t num_analyzed_frames, float signal_energy);
|
||||
|
||||
void Update(
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
|
||||
float signal_spectral_sum,
|
||||
float signal_energy);
|
||||
|
||||
const PriorSignalModel& get_prior_model() const {
|
||||
return prior_model_estimator_.get_prior_model();
|
||||
}
|
||||
const SignalModel& get_model() { return features_; }
|
||||
|
||||
private:
|
||||
float diff_normalization_ = 0.f;
|
||||
float signal_energy_sum_ = 0.f;
|
||||
Histograms histograms_;
|
||||
int histogram_analysis_counter_ = 500;
|
||||
PriorSignalModelEstimator prior_model_estimator_;
|
||||
SignalModel features_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_
|
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/speech_probability_estimator.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/ns/fast_math.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
SpeechProbabilityEstimator::SpeechProbabilityEstimator() {
|
||||
speech_probability_.fill(0.f);
|
||||
}
|
||||
|
||||
void SpeechProbabilityEstimator::Update(
|
||||
int32_t num_analyzed_frames,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
|
||||
float signal_spectral_sum,
|
||||
float signal_energy) {
|
||||
// Update models.
|
||||
if (num_analyzed_frames < kLongStartupPhaseBlocks) {
|
||||
signal_model_estimator_.AdjustNormalization(num_analyzed_frames,
|
||||
signal_energy);
|
||||
}
|
||||
signal_model_estimator_.Update(prior_snr, post_snr,
|
||||
conservative_noise_spectrum, signal_spectrum,
|
||||
signal_spectral_sum, signal_energy);
|
||||
|
||||
const SignalModel& model = signal_model_estimator_.get_model();
|
||||
const PriorSignalModel& prior_model =
|
||||
signal_model_estimator_.get_prior_model();
|
||||
|
||||
// Width parameter in sigmoid map for prior model.
|
||||
constexpr float kWidthPrior0 = 4.f;
|
||||
// Width for pause region: lower range, so increase width in tanh map.
|
||||
constexpr float kWidthPrior1 = 2.f * kWidthPrior0;
|
||||
|
||||
// Average LRT feature: use larger width in tanh map for pause regions.
|
||||
float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0;
|
||||
|
||||
// Compute indicator function: sigmoid map.
|
||||
float indicator0 =
|
||||
0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f);
|
||||
|
||||
// Spectral flatness feature: use larger width in tanh map for pause regions.
|
||||
width_prior = model.spectral_flatness > prior_model.flatness_threshold
|
||||
? kWidthPrior1
|
||||
: kWidthPrior0;
|
||||
|
||||
// Compute indicator function: sigmoid map.
|
||||
float indicator1 =
|
||||
0.5f * (tanh(1.f * width_prior *
|
||||
(prior_model.flatness_threshold - model.spectral_flatness)) +
|
||||
1.f);
|
||||
|
||||
// For template spectrum-difference : use larger width in tanh map for pause
|
||||
// regions.
|
||||
width_prior = model.spectral_diff < prior_model.template_diff_threshold
|
||||
? kWidthPrior1
|
||||
: kWidthPrior0;
|
||||
|
||||
// Compute indicator function: sigmoid map.
|
||||
float indicator2 =
|
||||
0.5f * (tanh(width_prior * (model.spectral_diff -
|
||||
prior_model.template_diff_threshold)) +
|
||||
1.f);
|
||||
|
||||
// Combine the indicator function with the feature weights.
|
||||
float ind_prior = prior_model.lrt_weighting * indicator0 +
|
||||
prior_model.flatness_weighting * indicator1 +
|
||||
prior_model.difference_weighting * indicator2;
|
||||
|
||||
// Compute the prior probability.
|
||||
prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_);
|
||||
|
||||
// Make sure probabilities are within range: keep floor to 0.01.
|
||||
prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f);
|
||||
|
||||
// Final speech probability: combine prior model with LR factor:.
|
||||
float gain_prior =
|
||||
(1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f);
|
||||
|
||||
std::array<float, kFftSizeBy2Plus1> inv_lrt;
|
||||
ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt);
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/ns/ns_common.h"
|
||||
#include "modules/audio_processing/ns/signal_model_estimator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for estimating the probability of speech.
|
||||
class SpeechProbabilityEstimator {
|
||||
public:
|
||||
SpeechProbabilityEstimator();
|
||||
SpeechProbabilityEstimator(const SpeechProbabilityEstimator&) = delete;
|
||||
SpeechProbabilityEstimator& operator=(const SpeechProbabilityEstimator&) =
|
||||
delete;
|
||||
|
||||
// Compute speech probability.
|
||||
void Update(
|
||||
int32_t num_analyzed_frames,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
|
||||
float signal_spectral_sum,
|
||||
float signal_energy);
|
||||
|
||||
float get_prior_probability() const { return prior_speech_prob_; }
|
||||
rtc::ArrayView<const float> get_probability() { return speech_probability_; }
|
||||
|
||||
private:
|
||||
SignalModelEstimator signal_model_estimator_;
|
||||
float prior_speech_prob_ = .5f;
|
||||
std::array<float, kFftSizeBy2Plus1> speech_probability_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_
|
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/suppression_params.h"
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
SuppressionParams::SuppressionParams(
|
||||
NsConfig::SuppressionLevel suppression_level) {
|
||||
switch (suppression_level) {
|
||||
case NsConfig::SuppressionLevel::k6dB:
|
||||
over_subtraction_factor = 1.f;
|
||||
// 6 dB attenuation.
|
||||
minimum_attenuating_gain = 0.5f;
|
||||
use_attenuation_adjustment = false;
|
||||
break;
|
||||
case NsConfig::SuppressionLevel::k12dB:
|
||||
over_subtraction_factor = 1.f;
|
||||
// 12 dB attenuation.
|
||||
minimum_attenuating_gain = 0.25f;
|
||||
use_attenuation_adjustment = true;
|
||||
break;
|
||||
case NsConfig::SuppressionLevel::k18dB:
|
||||
over_subtraction_factor = 1.1f;
|
||||
// 18 dB attenuation.
|
||||
minimum_attenuating_gain = 0.125f;
|
||||
use_attenuation_adjustment = true;
|
||||
break;
|
||||
case NsConfig::SuppressionLevel::k21dB:
|
||||
over_subtraction_factor = 1.25f;
|
||||
// 20.9 dB attenuation.
|
||||
minimum_attenuating_gain = 0.09f;
|
||||
use_attenuation_adjustment = true;
|
||||
break;
|
||||
default:
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_
|
||||
|
||||
#include "modules/audio_processing/ns/ns_config.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
struct SuppressionParams {
|
||||
explicit SuppressionParams(NsConfig::SuppressionLevel suppression_level);
|
||||
SuppressionParams(const SuppressionParams&) = delete;
|
||||
SuppressionParams& operator=(const SuppressionParams&) = delete;
|
||||
|
||||
float over_subtraction_factor;
|
||||
float minimum_attenuating_gain;
|
||||
bool use_attenuation_adjustment;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_
|
121
VocieProcess/modules/audio_processing/ns/wiener_filter.cc
Normal file
121
VocieProcess/modules/audio_processing/ns/wiener_filter.cc
Normal file
@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/ns/wiener_filter.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/ns/fast_math.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
WienerFilter::WienerFilter(const SuppressionParams& suppression_params)
|
||||
: suppression_params_(suppression_params) {
|
||||
filter_.fill(1.f);
|
||||
initial_spectral_estimate_.fill(0.f);
|
||||
spectrum_prev_process_.fill(0.f);
|
||||
}
|
||||
|
||||
void WienerFilter::Update(
|
||||
int32_t num_analyzed_frames,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> prev_noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> parametric_noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) {
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
// Previous estimate based on previous frame with gain filter.
|
||||
float prev_tsa = spectrum_prev_process_[i] /
|
||||
(prev_noise_spectrum[i] + 0.0001f) * filter_[i];
|
||||
|
||||
// Current estimate.
|
||||
float current_tsa;
|
||||
if (signal_spectrum[i] > noise_spectrum[i]) {
|
||||
current_tsa = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f;
|
||||
} else {
|
||||
current_tsa = 0.f;
|
||||
}
|
||||
|
||||
// Directed decision estimate is sum of two terms: current estimate and
|
||||
// previous estimate.
|
||||
float snr_prior = 0.98f * prev_tsa + (1.f - 0.98f) * current_tsa;
|
||||
filter_[i] =
|
||||
snr_prior / (suppression_params_.over_subtraction_factor + snr_prior);
|
||||
filter_[i] = std::max(std::min(filter_[i], 1.f),
|
||||
suppression_params_.minimum_attenuating_gain);
|
||||
}
|
||||
|
||||
if (num_analyzed_frames < kShortStartupPhaseBlocks) {
|
||||
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
|
||||
initial_spectral_estimate_[i] += signal_spectrum[i];
|
||||
float filter_initial = initial_spectral_estimate_[i] -
|
||||
suppression_params_.over_subtraction_factor *
|
||||
parametric_noise_spectrum[i];
|
||||
filter_initial /= initial_spectral_estimate_[i] + 0.0001f;
|
||||
|
||||
filter_initial = std::max(std::min(filter_initial, 1.f),
|
||||
suppression_params_.minimum_attenuating_gain);
|
||||
|
||||
// Weight the two suppression filters.
|
||||
constexpr float kOnyByShortStartupPhaseBlocks =
|
||||
1.f / kShortStartupPhaseBlocks;
|
||||
filter_initial *= kShortStartupPhaseBlocks - num_analyzed_frames;
|
||||
filter_[i] *= num_analyzed_frames;
|
||||
filter_[i] += filter_initial;
|
||||
filter_[i] *= kOnyByShortStartupPhaseBlocks;
|
||||
}
|
||||
}
|
||||
|
||||
std::copy(signal_spectrum.begin(), signal_spectrum.end(),
|
||||
spectrum_prev_process_.begin());
|
||||
}
|
||||
|
||||
float WienerFilter::ComputeOverallScalingFactor(
|
||||
int32_t num_analyzed_frames,
|
||||
float prior_speech_probability,
|
||||
float energy_before_filtering,
|
||||
float energy_after_filtering) const {
|
||||
if (!suppression_params_.use_attenuation_adjustment ||
|
||||
num_analyzed_frames <= kLongStartupPhaseBlocks) {
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
float gain = SqrtFastApproximation(energy_after_filtering /
|
||||
(energy_before_filtering + 1.f));
|
||||
|
||||
// Scaling for new version. Threshold in final energy gain factor calculation.
|
||||
constexpr float kBLim = 0.5f;
|
||||
float scale_factor1 = 1.f;
|
||||
if (gain > kBLim) {
|
||||
scale_factor1 = 1.f + 1.3f * (gain - kBLim);
|
||||
if (gain * scale_factor1 > 1.f) {
|
||||
scale_factor1 = 1.f / gain;
|
||||
}
|
||||
}
|
||||
|
||||
float scale_factor2 = 1.f;
|
||||
if (gain < kBLim) {
|
||||
// Do not reduce scale too much for pause regions: attenuation here should
|
||||
// be controlled by flooring.
|
||||
gain = std::max(gain, suppression_params_.minimum_attenuating_gain);
|
||||
scale_factor2 = 1.f - 0.3f * (kBLim - gain);
|
||||
}
|
||||
|
||||
// Combine both scales with speech/noise prob: note prior
|
||||
// (prior_speech_probability) is not frequency dependent.
|
||||
return prior_speech_probability * scale_factor1 +
|
||||
(1.f - prior_speech_probability) * scale_factor2;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
57
VocieProcess/modules/audio_processing/ns/wiener_filter.h
Normal file
57
VocieProcess/modules/audio_processing/ns/wiener_filter.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/ns/ns_common.h"
|
||||
#include "modules/audio_processing/ns/suppression_params.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Estimates a Wiener-filter based frequency domain noise reduction filter.
|
||||
class WienerFilter {
|
||||
public:
|
||||
explicit WienerFilter(const SuppressionParams& suppression_params);
|
||||
WienerFilter(const WienerFilter&) = delete;
|
||||
WienerFilter& operator=(const WienerFilter&) = delete;
|
||||
|
||||
// Updates the filter estimate.
|
||||
void Update(
|
||||
int32_t num_analyzed_frames,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> prev_noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> parametric_noise_spectrum,
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum);
|
||||
|
||||
// Compute an overall gain scaling factor.
|
||||
float ComputeOverallScalingFactor(int32_t num_analyzed_frames,
|
||||
float prior_speech_probability,
|
||||
float energy_before_filtering,
|
||||
float energy_after_filtering) const;
|
||||
|
||||
// Returns the filter.
|
||||
rtc::ArrayView<const float, kFftSizeBy2Plus1> get_filter() const {
|
||||
return filter_;
|
||||
}
|
||||
|
||||
private:
|
||||
const SuppressionParams& suppression_params_;
|
||||
std::array<float, kFftSizeBy2Plus1> spectrum_prev_process_;
|
||||
std::array<float, kFftSizeBy2Plus1> initial_spectral_estimate_;
|
||||
std::array<float, kFftSizeBy2Plus1> filter_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_
|
Loading…
Reference in New Issue
Block a user