FaceAccess/VocieProcess/modules/audio_processing/aec3/stationarity_estimator.cc

242 lines
8.3 KiB
C++
Raw Normal View History

2024-09-05 09:59:28 +08:00
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/stationarity_estimator.h"
#include <algorithm>
#include <array>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/spectrum_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
namespace webrtc {
namespace {
constexpr float kMinNoisePower = 10.f;
constexpr int kHangoverBlocks = kNumBlocksPerSecond / 20;
constexpr int kNBlocksAverageInitPhase = 20;
constexpr int kNBlocksInitialPhase = kNumBlocksPerSecond * 2.;
} // namespace
StationarityEstimator::StationarityEstimator()
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)) {
Reset();
}
StationarityEstimator::~StationarityEstimator() = default;
void StationarityEstimator::Reset() {
noise_.Reset();
hangovers_.fill(0);
stationarity_flags_.fill(false);
}
// Update just the noise estimator. Usefull until the delay is known
void StationarityEstimator::UpdateNoiseEstimator(
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> spectrum) {
noise_.Update(spectrum);
data_dumper_->DumpRaw("aec3_stationarity_noise_spectrum", noise_.Spectrum());
data_dumper_->DumpRaw("aec3_stationarity_is_block_stationary",
IsBlockStationary());
}
void StationarityEstimator::UpdateStationarityFlags(
const SpectrumBuffer& spectrum_buffer,
rtc::ArrayView<const float> render_reverb_contribution_spectrum,
int idx_current,
int num_lookahead) {
std::array<int, kWindowLength> indexes;
int num_lookahead_bounded = std::min(num_lookahead, kWindowLength - 1);
int idx = idx_current;
if (num_lookahead_bounded < kWindowLength - 1) {
int num_lookback = (kWindowLength - 1) - num_lookahead_bounded;
idx = spectrum_buffer.OffsetIndex(idx_current, num_lookback);
}
// For estimating the stationarity properties of the current frame, the
// power for each band is accumulated for several consecutive spectra in the
// method EstimateBandStationarity.
// In order to avoid getting the indexes of the spectra for every band with
// its associated overhead, those indexes are stored in an array and then use
// when the estimation is done.
indexes[0] = idx;
for (size_t k = 1; k < indexes.size(); ++k) {
indexes[k] = spectrum_buffer.DecIndex(indexes[k - 1]);
}
RTC_DCHECK_EQ(
spectrum_buffer.DecIndex(indexes[kWindowLength - 1]),
spectrum_buffer.OffsetIndex(idx_current, -(num_lookahead_bounded + 1)));
for (size_t k = 0; k < stationarity_flags_.size(); ++k) {
stationarity_flags_[k] = EstimateBandStationarity(
spectrum_buffer, render_reverb_contribution_spectrum, indexes, k);
}
UpdateHangover();
SmoothStationaryPerFreq();
}
bool StationarityEstimator::IsBlockStationary() const {
float acum_stationarity = 0.f;
RTC_DCHECK_EQ(stationarity_flags_.size(), kFftLengthBy2Plus1);
for (size_t band = 0; band < stationarity_flags_.size(); ++band) {
bool st = IsBandStationary(band);
acum_stationarity += static_cast<float>(st);
}
return ((acum_stationarity * (1.f / kFftLengthBy2Plus1)) > 0.75f);
}
bool StationarityEstimator::EstimateBandStationarity(
const SpectrumBuffer& spectrum_buffer,
rtc::ArrayView<const float> average_reverb,
const std::array<int, kWindowLength>& indexes,
size_t band) const {
constexpr float kThrStationarity = 10.f;
float acum_power = 0.f;
const int num_render_channels =
static_cast<int>(spectrum_buffer.buffer[0].size());
const float one_by_num_channels = 1.f / num_render_channels;
for (auto idx : indexes) {
for (int ch = 0; ch < num_render_channels; ++ch) {
acum_power += spectrum_buffer.buffer[idx][ch][band] * one_by_num_channels;
}
}
acum_power += average_reverb[band];
float noise = kWindowLength * GetStationarityPowerBand(band);
RTC_CHECK_LT(0.f, noise);
bool stationary = acum_power < kThrStationarity * noise;
data_dumper_->DumpRaw("aec3_stationarity_long_ratio", acum_power / noise);
return stationary;
}
bool StationarityEstimator::AreAllBandsStationary() {
for (auto b : stationarity_flags_) {
if (!b)
return false;
}
return true;
}
void StationarityEstimator::UpdateHangover() {
bool reduce_hangover = AreAllBandsStationary();
for (size_t k = 0; k < stationarity_flags_.size(); ++k) {
if (!stationarity_flags_[k]) {
hangovers_[k] = kHangoverBlocks;
} else if (reduce_hangover) {
hangovers_[k] = std::max(hangovers_[k] - 1, 0);
}
}
}
void StationarityEstimator::SmoothStationaryPerFreq() {
std::array<bool, kFftLengthBy2Plus1> all_ahead_stationary_smooth;
for (size_t k = 1; k < kFftLengthBy2Plus1 - 1; ++k) {
all_ahead_stationary_smooth[k] = stationarity_flags_[k - 1] &&
stationarity_flags_[k] &&
stationarity_flags_[k + 1];
}
all_ahead_stationary_smooth[0] = all_ahead_stationary_smooth[1];
all_ahead_stationary_smooth[kFftLengthBy2Plus1 - 1] =
all_ahead_stationary_smooth[kFftLengthBy2Plus1 - 2];
stationarity_flags_ = all_ahead_stationary_smooth;
}
std::atomic<int> StationarityEstimator::instance_count_(0);
StationarityEstimator::NoiseSpectrum::NoiseSpectrum() {
Reset();
}
StationarityEstimator::NoiseSpectrum::~NoiseSpectrum() = default;
void StationarityEstimator::NoiseSpectrum::Reset() {
block_counter_ = 0;
noise_spectrum_.fill(kMinNoisePower);
}
void StationarityEstimator::NoiseSpectrum::Update(
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> spectrum) {
RTC_DCHECK_LE(1, spectrum[0].size());
const int num_render_channels = static_cast<int>(spectrum.size());
std::array<float, kFftLengthBy2Plus1> avg_spectrum_data;
rtc::ArrayView<const float> avg_spectrum;
if (num_render_channels == 1) {
avg_spectrum = spectrum[0];
} else {
// For multiple channels, average the channel spectra before passing to the
// noise spectrum estimator.
avg_spectrum = avg_spectrum_data;
std::copy(spectrum[0].begin(), spectrum[0].end(),
avg_spectrum_data.begin());
for (int ch = 1; ch < num_render_channels; ++ch) {
for (size_t k = 1; k < kFftLengthBy2Plus1; ++k) {
avg_spectrum_data[k] += spectrum[ch][k];
}
}
const float one_by_num_channels = 1.f / num_render_channels;
for (size_t k = 1; k < kFftLengthBy2Plus1; ++k) {
avg_spectrum_data[k] *= one_by_num_channels;
}
}
++block_counter_;
float alpha = GetAlpha();
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
if (block_counter_ <= kNBlocksAverageInitPhase) {
noise_spectrum_[k] += (1.f / kNBlocksAverageInitPhase) * avg_spectrum[k];
} else {
noise_spectrum_[k] =
UpdateBandBySmoothing(avg_spectrum[k], noise_spectrum_[k], alpha);
}
}
}
float StationarityEstimator::NoiseSpectrum::GetAlpha() const {
constexpr float kAlpha = 0.004f;
constexpr float kAlphaInit = 0.04f;
constexpr float kTiltAlpha = (kAlphaInit - kAlpha) / kNBlocksInitialPhase;
if (block_counter_ > (kNBlocksInitialPhase + kNBlocksAverageInitPhase)) {
return kAlpha;
} else {
return kAlphaInit -
kTiltAlpha * (block_counter_ - kNBlocksAverageInitPhase);
}
}
float StationarityEstimator::NoiseSpectrum::UpdateBandBySmoothing(
float power_band,
float power_band_noise,
float alpha) const {
float power_band_noise_updated = power_band_noise;
if (power_band_noise < power_band) {
RTC_DCHECK_GT(power_band, 0.f);
float alpha_inc = alpha * (power_band_noise / power_band);
if (block_counter_ > kNBlocksInitialPhase) {
if (10.f * power_band_noise < power_band) {
alpha_inc *= 0.1f;
}
}
power_band_noise_updated += alpha_inc * (power_band - power_band_noise);
} else {
power_band_noise_updated += alpha * (power_band - power_band_noise);
power_band_noise_updated =
std::max(power_band_noise_updated, kMinNoisePower);
}
return power_band_noise_updated;
}
} // namespace webrtc