update code.
This commit is contained in:
parent
4f3dc015f7
commit
ccf69909d6
@ -8,6 +8,7 @@ add_executable(Record main.cpp
|
||||
Player.cpp
|
||||
Recorder.cpp
|
||||
SpeexDsp.h SpeexDsp.cpp
|
||||
Utility.h Utility.cpp
|
||||
WebRTCPublisher.h WebRTCPublisher.cpp
|
||||
)
|
||||
|
||||
|
12
Record/Utility.cpp
Normal file
12
Record/Utility.cpp
Normal file
@ -0,0 +1,12 @@
|
||||
#include "Utility.h"
|
||||
|
||||
std::vector<uint8_t> duplicate(const uint8_t *data, int32_t byteSize) {
|
||||
std::vector<uint8_t> ret(byteSize * 2);
|
||||
auto pcm = reinterpret_cast<const uint16_t *>(data);
|
||||
auto retPcm = reinterpret_cast<uint16_t *>(ret.data());
|
||||
for (int i = 0; i < byteSize / 2; i++) {
|
||||
retPcm[2 * i] = pcm[i];
|
||||
retPcm[2 * i + 1] = pcm[i];
|
||||
}
|
||||
return ret;
|
||||
}
|
11
Record/Utility.h
Normal file
11
Record/Utility.h
Normal file
@ -0,0 +1,11 @@
|
||||
#ifndef __UTILITY_H__
|
||||
#define __UTILITY_H__
|
||||
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
|
||||
std::vector<uint8_t> split();
|
||||
std::vector<uint8_t> duplicate(const uint8_t *data, int32_t byteSize);
|
||||
|
||||
|
||||
#endif // __UTILITY_H__
|
139
VocieProcess/CMakeLists.txt
Normal file
139
VocieProcess/CMakeLists.txt
Normal file
@ -0,0 +1,139 @@
|
||||
cmake_minimum_required(VERSION 3.29)
|
||||
|
||||
project(VocieProcess)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
set(ABSL_PROPAGATE_CXX_STD ON)
|
||||
FetchContent_Declare(absl
|
||||
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
|
||||
)
|
||||
FetchContent_MakeAvailable(absl)
|
||||
|
||||
|
||||
add_library(VocieProcess
|
||||
api/audio/audio_processing_statistics.h api/audio/audio_processing_statistics.cc
|
||||
api/audio/audio_processing.h api/audio/audio_processing.cc
|
||||
api/audio/channel_layout.h api/audio/channel_layout.cc
|
||||
api/audio/echo_canceller3_config.h api/audio/echo_canceller3_config.cc
|
||||
|
||||
api/task_queue/task_queue_base.h api/task_queue/task_queue_base.cc
|
||||
|
||||
api/units/time_delta.h api/units/time_delta.cc
|
||||
api/units/timestamp.h api/units/timestamp.cc
|
||||
|
||||
common_audio/channel_buffer.h common_audio/channel_buffer.cc
|
||||
|
||||
common_audio/resampler/push_sinc_resampler.h common_audio/resampler/push_sinc_resampler.cc
|
||||
common_audio/resampler/sinc_resampler.h common_audio/resampler/sinc_resampler.cc
|
||||
|
||||
common_audio/signal_processing/dot_product_with_scale.h common_audio/signal_processing/dot_product_with_scale.cc
|
||||
|
||||
common_audio/third_party/ooura/fft_size_128/ooura_fft.h common_audio/third_party/ooura/fft_size_128/ooura_fft.cc
|
||||
common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
|
||||
|
||||
rtc_base/checks.h rtc_base/checks.cc
|
||||
rtc_base/logging.h rtc_base/logging.cc
|
||||
rtc_base/platform_thread_types.h rtc_base/platform_thread_types.cc
|
||||
rtc_base/race_checker.h rtc_base/race_checker.cc
|
||||
rtc_base/string_encode.h rtc_base/string_encode.cc
|
||||
rtc_base/string_to_number.h rtc_base/string_to_number.cc
|
||||
rtc_base/string_utils.h rtc_base/string_utils.cc
|
||||
rtc_base/system_time.h rtc_base/system_time.cc
|
||||
rtc_base/time_utils.h rtc_base/time_utils.cc
|
||||
rtc_base/win32.h rtc_base/win32.cc
|
||||
|
||||
rtc_base/containers/flat_tree.h rtc_base/containers/flat_tree.cc
|
||||
|
||||
rtc_base/experiments/field_trial_parser.h rtc_base/experiments/field_trial_parser.cc
|
||||
|
||||
rtc_base/memory/aligned_malloc.h rtc_base/memory/aligned_malloc.cc
|
||||
|
||||
rtc_base/strings/string_builder.h rtc_base/strings/string_builder.cc
|
||||
|
||||
modules/audio_processing/audio_buffer.h modules/audio_processing/audio_buffer.cc
|
||||
modules/audio_processing/high_pass_filter.h modules/audio_processing/high_pass_filter.cc
|
||||
modules/audio_processing/splitting_filter.h modules/audio_processing/splitting_filter.cc
|
||||
modules/audio_processing/three_band_filter_bank.h modules/audio_processing/three_band_filter_bank.cc
|
||||
|
||||
modules/audio_processing/aec3/adaptive_fir_filter_erl.h modules/audio_processing/aec3/adaptive_fir_filter_erl.cc
|
||||
modules/audio_processing/aec3/adaptive_fir_filter.h modules/audio_processing/aec3/adaptive_fir_filter.cc
|
||||
modules/audio_processing/aec3/aec_state.h modules/audio_processing/aec3/aec_state.cc
|
||||
modules/audio_processing/aec3/aec3_common.h modules/audio_processing/aec3/aec3_common.cc
|
||||
modules/audio_processing/aec3/aec3_fft.h modules/audio_processing/aec3/aec3_fft.cc
|
||||
modules/audio_processing/aec3/alignment_mixer.h modules/audio_processing/aec3/alignment_mixer.cc
|
||||
modules/audio_processing/aec3/api_call_jitter_metrics.h modules/audio_processing/aec3/api_call_jitter_metrics.cc
|
||||
modules/audio_processing/aec3/block_buffer.h modules/audio_processing/aec3/block_buffer.cc
|
||||
modules/audio_processing/aec3/block_delay_buffer.h modules/audio_processing/aec3/block_delay_buffer.cc
|
||||
modules/audio_processing/aec3/block_framer.h modules/audio_processing/aec3/block_framer.cc
|
||||
modules/audio_processing/aec3/block_processor_metrics.h modules/audio_processing/aec3/block_processor_metrics.cc
|
||||
modules/audio_processing/aec3/block_processor.h modules/audio_processing/aec3/block_processor.cc
|
||||
modules/audio_processing/aec3/clockdrift_detector.h modules/audio_processing/aec3/clockdrift_detector.cc
|
||||
modules/audio_processing/aec3/coarse_filter_update_gain.h modules/audio_processing/aec3/coarse_filter_update_gain.cc
|
||||
modules/audio_processing/aec3/comfort_noise_generator.h modules/audio_processing/aec3/comfort_noise_generator.cc
|
||||
modules/audio_processing/aec3/config_selector.h modules/audio_processing/aec3/config_selector.cc
|
||||
modules/audio_processing/aec3/decimator.h modules/audio_processing/aec3/decimator.cc
|
||||
modules/audio_processing/aec3/dominant_nearend_detector.h modules/audio_processing/aec3/dominant_nearend_detector.cc
|
||||
modules/audio_processing/aec3/downsampled_render_buffer.h modules/audio_processing/aec3/downsampled_render_buffer.cc
|
||||
modules/audio_processing/aec3/echo_audibility.h modules/audio_processing/aec3/echo_audibility.cc
|
||||
modules/audio_processing/aec3/echo_canceller3.h modules/audio_processing/aec3/echo_canceller3.cc
|
||||
modules/audio_processing/aec3/echo_path_delay_estimator.h modules/audio_processing/aec3/echo_path_delay_estimator.cc
|
||||
modules/audio_processing/aec3/echo_path_variability.h modules/audio_processing/aec3/echo_path_variability.cc
|
||||
modules/audio_processing/aec3/echo_remover_metrics.h modules/audio_processing/aec3/echo_remover_metrics.cc
|
||||
modules/audio_processing/aec3/echo_remover.h modules/audio_processing/aec3/echo_remover.cc
|
||||
modules/audio_processing/aec3/erl_estimator.h modules/audio_processing/aec3/erl_estimator.cc
|
||||
modules/audio_processing/aec3/erle_estimator.h modules/audio_processing/aec3/erle_estimator.cc
|
||||
modules/audio_processing/aec3/fft_buffer.h modules/audio_processing/aec3/fft_buffer.cc
|
||||
modules/audio_processing/aec3/filter_analyzer.h modules/audio_processing/aec3/filter_analyzer.cc
|
||||
modules/audio_processing/aec3/frame_blocker.h modules/audio_processing/aec3/frame_blocker.cc
|
||||
modules/audio_processing/aec3/fullband_erle_estimator.h modules/audio_processing/aec3/fullband_erle_estimator.cc
|
||||
modules/audio_processing/aec3/matched_filter_lag_aggregator.h modules/audio_processing/aec3/matched_filter_lag_aggregator.cc
|
||||
modules/audio_processing/aec3/matched_filter.h modules/audio_processing/aec3/matched_filter.cc
|
||||
modules/audio_processing/aec3/moving_average.h modules/audio_processing/aec3/moving_average.cc
|
||||
modules/audio_processing/aec3/multi_channel_content_detector.h modules/audio_processing/aec3/multi_channel_content_detector.cc
|
||||
modules/audio_processing/aec3/refined_filter_update_gain.h modules/audio_processing/aec3/refined_filter_update_gain.cc
|
||||
modules/audio_processing/aec3/render_buffer.h modules/audio_processing/aec3/render_buffer.cc
|
||||
modules/audio_processing/aec3/render_delay_buffer.h modules/audio_processing/aec3/render_delay_buffer.cc
|
||||
modules/audio_processing/aec3/render_delay_controller_metrics.h modules/audio_processing/aec3/render_delay_controller_metrics.cc
|
||||
modules/audio_processing/aec3/render_delay_controller.h modules/audio_processing/aec3/render_delay_controller.cc
|
||||
modules/audio_processing/aec3/render_signal_analyzer.h modules/audio_processing/aec3/render_signal_analyzer.cc
|
||||
modules/audio_processing/aec3/residual_echo_estimator.h modules/audio_processing/aec3/residual_echo_estimator.cc
|
||||
modules/audio_processing/aec3/reverb_decay_estimator.h modules/audio_processing/aec3/reverb_decay_estimator.cc
|
||||
modules/audio_processing/aec3/reverb_frequency_response.h modules/audio_processing/aec3/reverb_frequency_response.cc
|
||||
modules/audio_processing/aec3/reverb_model_estimator.h modules/audio_processing/aec3/reverb_model_estimator.cc
|
||||
modules/audio_processing/aec3/reverb_model.h modules/audio_processing/aec3/reverb_model.cc
|
||||
modules/audio_processing/aec3/signal_dependent_erle_estimator.h modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
|
||||
modules/audio_processing/aec3/spectrum_buffer.h modules/audio_processing/aec3/spectrum_buffer.cc
|
||||
modules/audio_processing/aec3/stationarity_estimator.h modules/audio_processing/aec3/stationarity_estimator.cc
|
||||
modules/audio_processing/aec3/subband_erle_estimator.h modules/audio_processing/aec3/subband_erle_estimator.cc
|
||||
modules/audio_processing/aec3/subband_nearend_detector.h modules/audio_processing/aec3/subband_nearend_detector.cc
|
||||
modules/audio_processing/aec3/subtractor_output_analyzer.h modules/audio_processing/aec3/subtractor_output_analyzer.cc
|
||||
modules/audio_processing/aec3/subtractor_output.h modules/audio_processing/aec3/subtractor_output.cc
|
||||
modules/audio_processing/aec3/subtractor.h modules/audio_processing/aec3/subtractor.cc
|
||||
modules/audio_processing/aec3/suppression_filter.h modules/audio_processing/aec3/suppression_filter.cc
|
||||
modules/audio_processing/aec3/suppression_gain.h modules/audio_processing/aec3/suppression_gain.cc
|
||||
modules/audio_processing/aec3/transparent_mode.h modules/audio_processing/aec3/transparent_mode.cc
|
||||
|
||||
modules/audio_processing/logging/apm_data_dumper.h modules/audio_processing/logging/apm_data_dumper.cc
|
||||
|
||||
modules/audio_processing/utility/cascaded_biquad_filter.h modules/audio_processing/utility/cascaded_biquad_filter.cc
|
||||
)
|
||||
|
||||
target_compile_definitions(VocieProcess
|
||||
PRIVATE WEBRTC_WIN
|
||||
PRIVATE NOMINMAX # <windows.h>
|
||||
PRIVATE RTC_DISABLE_LOGGING
|
||||
PRIVATE RTC_METRICS_ENABLED=0
|
||||
PRIVATE WEBRTC_APM_DEBUG_DUMP=0
|
||||
)
|
||||
|
||||
target_include_directories(VocieProcess
|
||||
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
target_link_libraries(VocieProcess
|
||||
PRIVATE absl::optional
|
||||
)
|
335
VocieProcess/api/array_view.h
Normal file
335
VocieProcess/api/array_view.h
Normal file
@ -0,0 +1,335 @@
|
||||
/*
|
||||
* Copyright 2015 The WebRTC Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_ARRAY_VIEW_H_
|
||||
#define API_ARRAY_VIEW_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <iterator>
|
||||
#include <type_traits>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/type_traits.h"
|
||||
|
||||
namespace rtc {
|
||||
|
||||
// tl;dr: rtc::ArrayView is the same thing as gsl::span from the Guideline
|
||||
// Support Library.
|
||||
//
|
||||
// Many functions read from or write to arrays. The obvious way to do this is
|
||||
// to use two arguments, a pointer to the first element and an element count:
|
||||
//
|
||||
// bool Contains17(const int* arr, size_t size) {
|
||||
// for (size_t i = 0; i < size; ++i) {
|
||||
// if (arr[i] == 17)
|
||||
// return true;
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// This is flexible, since it doesn't matter how the array is stored (C array,
|
||||
// std::vector, rtc::Buffer, ...), but it's error-prone because the caller has
|
||||
// to correctly specify the array length:
|
||||
//
|
||||
// Contains17(arr, arraysize(arr)); // C array
|
||||
// Contains17(arr.data(), arr.size()); // std::vector
|
||||
// Contains17(arr, size); // pointer + size
|
||||
// ...
|
||||
//
|
||||
// It's also kind of messy to have two separate arguments for what is
|
||||
// conceptually a single thing.
|
||||
//
|
||||
// Enter rtc::ArrayView<T>. It contains a T pointer (to an array it doesn't
|
||||
// own) and a count, and supports the basic things you'd expect, such as
|
||||
// indexing and iteration. It allows us to write our function like this:
|
||||
//
|
||||
// bool Contains17(rtc::ArrayView<const int> arr) {
|
||||
// for (auto e : arr) {
|
||||
// if (e == 17)
|
||||
// return true;
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// And even better, because a bunch of things will implicitly convert to
|
||||
// ArrayView, we can call it like this:
|
||||
//
|
||||
// Contains17(arr); // C array
|
||||
// Contains17(arr); // std::vector
|
||||
// Contains17(rtc::ArrayView<int>(arr, size)); // pointer + size
|
||||
// Contains17(nullptr); // nullptr -> empty ArrayView
|
||||
// ...
|
||||
//
|
||||
// ArrayView<T> stores both a pointer and a size, but you may also use
|
||||
// ArrayView<T, N>, which has a size that's fixed at compile time (which means
|
||||
// it only has to store the pointer).
|
||||
//
|
||||
// One important point is that ArrayView<T> and ArrayView<const T> are
|
||||
// different types, which allow and don't allow mutation of the array elements,
|
||||
// respectively. The implicit conversions work just like you'd hope, so that
|
||||
// e.g. vector<int> will convert to either ArrayView<int> or ArrayView<const
|
||||
// int>, but const vector<int> will convert only to ArrayView<const int>.
|
||||
// (ArrayView itself can be the source type in such conversions, so
|
||||
// ArrayView<int> will convert to ArrayView<const int>.)
|
||||
//
|
||||
// Note: ArrayView is tiny (just a pointer and a count if variable-sized, just
|
||||
// a pointer if fix-sized) and trivially copyable, so it's probably cheaper to
|
||||
// pass it by value than by const reference.
|
||||
|
||||
namespace array_view_internal {
|
||||
|
||||
// Magic constant for indicating that the size of an ArrayView is variable
|
||||
// instead of fixed.
|
||||
enum : std::ptrdiff_t { kArrayViewVarSize = -4711 };
|
||||
|
||||
// Base class for ArrayViews of fixed nonzero size.
|
||||
template <typename T, std::ptrdiff_t Size>
|
||||
class ArrayViewBase {
|
||||
static_assert(Size > 0, "ArrayView size must be variable or non-negative");
|
||||
|
||||
public:
|
||||
ArrayViewBase(T* data, size_t size) : data_(data) {}
|
||||
|
||||
static constexpr size_t size() { return Size; }
|
||||
static constexpr bool empty() { return false; }
|
||||
T* data() const { return data_; }
|
||||
|
||||
protected:
|
||||
static constexpr bool fixed_size() { return true; }
|
||||
|
||||
private:
|
||||
T* data_;
|
||||
};
|
||||
|
||||
// Specialized base class for ArrayViews of fixed zero size.
|
||||
template <typename T>
|
||||
class ArrayViewBase<T, 0> {
|
||||
public:
|
||||
explicit ArrayViewBase(T* data, size_t size) {}
|
||||
|
||||
static constexpr size_t size() { return 0; }
|
||||
static constexpr bool empty() { return true; }
|
||||
T* data() const { return nullptr; }
|
||||
|
||||
protected:
|
||||
static constexpr bool fixed_size() { return true; }
|
||||
};
|
||||
|
||||
// Specialized base class for ArrayViews of variable size.
|
||||
template <typename T>
|
||||
class ArrayViewBase<T, array_view_internal::kArrayViewVarSize> {
|
||||
public:
|
||||
ArrayViewBase(T* data, size_t size)
|
||||
: data_(size == 0 ? nullptr : data), size_(size) {}
|
||||
|
||||
size_t size() const { return size_; }
|
||||
bool empty() const { return size_ == 0; }
|
||||
T* data() const { return data_; }
|
||||
|
||||
protected:
|
||||
static constexpr bool fixed_size() { return false; }
|
||||
|
||||
private:
|
||||
T* data_;
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
} // namespace array_view_internal
|
||||
|
||||
template <typename T,
|
||||
std::ptrdiff_t Size = array_view_internal::kArrayViewVarSize>
|
||||
class ArrayView final : public array_view_internal::ArrayViewBase<T, Size> {
|
||||
public:
|
||||
using value_type = T;
|
||||
using reference = value_type&;
|
||||
using const_reference = const value_type&;
|
||||
using pointer = value_type*;
|
||||
using const_pointer = const value_type*;
|
||||
using const_iterator = const T*;
|
||||
|
||||
// Construct an ArrayView from a pointer and a length.
|
||||
template <typename U>
|
||||
ArrayView(U* data, size_t size)
|
||||
: array_view_internal::ArrayViewBase<T, Size>::ArrayViewBase(data, size) {
|
||||
RTC_DCHECK_EQ(size == 0 ? nullptr : data, this->data());
|
||||
RTC_DCHECK_EQ(size, this->size());
|
||||
RTC_DCHECK_EQ(!this->data(),
|
||||
this->size() == 0); // data is null iff size == 0.
|
||||
}
|
||||
|
||||
// Construct an empty ArrayView. Note that fixed-size ArrayViews of size > 0
|
||||
// cannot be empty.
|
||||
ArrayView() : ArrayView(nullptr, 0) {}
|
||||
ArrayView(std::nullptr_t) // NOLINT
|
||||
: ArrayView() {}
|
||||
ArrayView(std::nullptr_t, size_t size)
|
||||
: ArrayView(static_cast<T*>(nullptr), size) {
|
||||
static_assert(Size == 0 || Size == array_view_internal::kArrayViewVarSize,
|
||||
"");
|
||||
RTC_DCHECK_EQ(0, size);
|
||||
}
|
||||
|
||||
// Construct an ArrayView from a C-style array.
|
||||
template <typename U, size_t N>
|
||||
ArrayView(U (&array)[N]) // NOLINT
|
||||
: ArrayView(array, N) {
|
||||
static_assert(Size == N || Size == array_view_internal::kArrayViewVarSize,
|
||||
"Array size must match ArrayView size");
|
||||
}
|
||||
|
||||
// (Only if size is fixed.) Construct a fixed size ArrayView<T, N> from a
|
||||
// non-const std::array instance. For an ArrayView with variable size, the
|
||||
// used ctor is ArrayView(U& u) instead.
|
||||
template <typename U,
|
||||
size_t N,
|
||||
typename std::enable_if<
|
||||
Size == static_cast<std::ptrdiff_t>(N)>::type* = nullptr>
|
||||
ArrayView(std::array<U, N>& u) // NOLINT
|
||||
: ArrayView(u.data(), u.size()) {}
|
||||
|
||||
// (Only if size is fixed.) Construct a fixed size ArrayView<T, N> where T is
|
||||
// const from a const(expr) std::array instance. For an ArrayView with
|
||||
// variable size, the used ctor is ArrayView(U& u) instead.
|
||||
template <typename U,
|
||||
size_t N,
|
||||
typename std::enable_if<
|
||||
Size == static_cast<std::ptrdiff_t>(N)>::type* = nullptr>
|
||||
ArrayView(const std::array<U, N>& u) // NOLINT
|
||||
: ArrayView(u.data(), u.size()) {}
|
||||
|
||||
// (Only if size is fixed.) Construct an ArrayView from any type U that has a
|
||||
// static constexpr size() method whose return value is equal to Size, and a
|
||||
// data() method whose return value converts implicitly to T*. In particular,
|
||||
// this means we allow conversion from ArrayView<T, N> to ArrayView<const T,
|
||||
// N>, but not the other way around. We also don't allow conversion from
|
||||
// ArrayView<T> to ArrayView<T, N>, or from ArrayView<T, M> to ArrayView<T,
|
||||
// N> when M != N.
|
||||
template <
|
||||
typename U,
|
||||
typename std::enable_if<Size != array_view_internal::kArrayViewVarSize &&
|
||||
HasDataAndSize<U, T>::value>::type* = nullptr>
|
||||
ArrayView(U& u) // NOLINT
|
||||
: ArrayView(u.data(), u.size()) {
|
||||
static_assert(U::size() == Size, "Sizes must match exactly");
|
||||
}
|
||||
template <
|
||||
typename U,
|
||||
typename std::enable_if<Size != array_view_internal::kArrayViewVarSize &&
|
||||
HasDataAndSize<U, T>::value>::type* = nullptr>
|
||||
ArrayView(const U& u) // NOLINT(runtime/explicit)
|
||||
: ArrayView(u.data(), u.size()) {
|
||||
static_assert(U::size() == Size, "Sizes must match exactly");
|
||||
}
|
||||
|
||||
// (Only if size is variable.) Construct an ArrayView from any type U that
|
||||
// has a size() method whose return value converts implicitly to size_t, and
|
||||
// a data() method whose return value converts implicitly to T*. In
|
||||
// particular, this means we allow conversion from ArrayView<T> to
|
||||
// ArrayView<const T>, but not the other way around. Other allowed
|
||||
// conversions include
|
||||
// ArrayView<T, N> to ArrayView<T> or ArrayView<const T>,
|
||||
// std::vector<T> to ArrayView<T> or ArrayView<const T>,
|
||||
// const std::vector<T> to ArrayView<const T>,
|
||||
// rtc::Buffer to ArrayView<uint8_t> or ArrayView<const uint8_t>, and
|
||||
// const rtc::Buffer to ArrayView<const uint8_t>.
|
||||
template <
|
||||
typename U,
|
||||
typename std::enable_if<Size == array_view_internal::kArrayViewVarSize &&
|
||||
HasDataAndSize<U, T>::value>::type* = nullptr>
|
||||
ArrayView(U& u) // NOLINT
|
||||
: ArrayView(u.data(), u.size()) {}
|
||||
template <
|
||||
typename U,
|
||||
typename std::enable_if<Size == array_view_internal::kArrayViewVarSize &&
|
||||
HasDataAndSize<U, T>::value>::type* = nullptr>
|
||||
ArrayView(const U& u) // NOLINT(runtime/explicit)
|
||||
: ArrayView(u.data(), u.size()) {}
|
||||
|
||||
// Indexing and iteration. These allow mutation even if the ArrayView is
|
||||
// const, because the ArrayView doesn't own the array. (To prevent mutation,
|
||||
// use a const element type.)
|
||||
T& operator[](size_t idx) const {
|
||||
RTC_DCHECK_LT(idx, this->size());
|
||||
RTC_DCHECK(this->data());
|
||||
return this->data()[idx];
|
||||
}
|
||||
T* begin() const { return this->data(); }
|
||||
T* end() const { return this->data() + this->size(); }
|
||||
const T* cbegin() const { return this->data(); }
|
||||
const T* cend() const { return this->data() + this->size(); }
|
||||
std::reverse_iterator<T*> rbegin() const {
|
||||
return std::make_reverse_iterator(end());
|
||||
}
|
||||
std::reverse_iterator<T*> rend() const {
|
||||
return std::make_reverse_iterator(begin());
|
||||
}
|
||||
std::reverse_iterator<const T*> crbegin() const {
|
||||
return std::make_reverse_iterator(cend());
|
||||
}
|
||||
std::reverse_iterator<const T*> crend() const {
|
||||
return std::make_reverse_iterator(cbegin());
|
||||
}
|
||||
|
||||
ArrayView<T> subview(size_t offset, size_t size) const {
|
||||
return offset < this->size()
|
||||
? ArrayView<T>(this->data() + offset,
|
||||
std::min(size, this->size() - offset))
|
||||
: ArrayView<T>();
|
||||
}
|
||||
ArrayView<T> subview(size_t offset) const {
|
||||
return subview(offset, this->size());
|
||||
}
|
||||
};
|
||||
|
||||
// Comparing two ArrayViews compares their (pointer,size) pairs; it does *not*
|
||||
// dereference the pointers.
|
||||
template <typename T, std::ptrdiff_t Size1, std::ptrdiff_t Size2>
|
||||
bool operator==(const ArrayView<T, Size1>& a, const ArrayView<T, Size2>& b) {
|
||||
return a.data() == b.data() && a.size() == b.size();
|
||||
}
|
||||
template <typename T, std::ptrdiff_t Size1, std::ptrdiff_t Size2>
|
||||
bool operator!=(const ArrayView<T, Size1>& a, const ArrayView<T, Size2>& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
// Variable-size ArrayViews are the size of two pointers; fixed-size ArrayViews
|
||||
// are the size of one pointer. (And as a special case, fixed-size ArrayViews
|
||||
// of size 0 require no storage.)
|
||||
static_assert(sizeof(ArrayView<int>) == 2 * sizeof(int*), "");
|
||||
static_assert(sizeof(ArrayView<int, 17>) == sizeof(int*), "");
|
||||
static_assert(std::is_empty<ArrayView<int, 0>>::value, "");
|
||||
|
||||
template <typename T>
|
||||
inline ArrayView<T> MakeArrayView(T* data, size_t size) {
|
||||
return ArrayView<T>(data, size);
|
||||
}
|
||||
|
||||
// Only for primitive types that have the same size and aligment.
|
||||
// Allow reinterpret cast of the array view to another primitive type of the
|
||||
// same size.
|
||||
// Template arguments order is (U, T, Size) to allow deduction of the template
|
||||
// arguments in client calls: reinterpret_array_view<target_type>(array_view).
|
||||
template <typename U, typename T, std::ptrdiff_t Size>
|
||||
inline ArrayView<U, Size> reinterpret_array_view(ArrayView<T, Size> view) {
|
||||
static_assert(sizeof(U) == sizeof(T) && alignof(U) == alignof(T),
|
||||
"ArrayView reinterpret_cast is only supported for casting "
|
||||
"between views that represent the same chunk of memory.");
|
||||
static_assert(
|
||||
std::is_fundamental<T>::value && std::is_fundamental<U>::value,
|
||||
"ArrayView reinterpret_cast is only supported for casting between "
|
||||
"fundamental types.");
|
||||
return ArrayView<U, Size>(reinterpret_cast<U*>(view.data()), view.size());
|
||||
}
|
||||
|
||||
} // namespace rtc
|
||||
|
||||
#endif // API_ARRAY_VIEW_H_
|
211
VocieProcess/api/audio/audio_processing.cc
Normal file
211
VocieProcess/api/audio/audio_processing.cc
Normal file
@ -0,0 +1,211 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/audio/audio_processing.h"
|
||||
#include <string>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/strings/string_builder.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
using Agc1Config = AudioProcessing::Config::GainController1;
|
||||
using Agc2Config = AudioProcessing::Config::GainController2;
|
||||
|
||||
std::string NoiseSuppressionLevelToString(
|
||||
const AudioProcessing::Config::NoiseSuppression::Level& level) {
|
||||
switch (level) {
|
||||
case AudioProcessing::Config::NoiseSuppression::Level::kLow:
|
||||
return "Low";
|
||||
case AudioProcessing::Config::NoiseSuppression::Level::kModerate:
|
||||
return "Moderate";
|
||||
case AudioProcessing::Config::NoiseSuppression::Level::kHigh:
|
||||
return "High";
|
||||
case AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh:
|
||||
return "VeryHigh";
|
||||
}
|
||||
RTC_CHECK_NOTREACHED();
|
||||
}
|
||||
|
||||
std::string GainController1ModeToString(const Agc1Config::Mode& mode) {
|
||||
switch (mode) {
|
||||
case Agc1Config::Mode::kAdaptiveAnalog:
|
||||
return "AdaptiveAnalog";
|
||||
case Agc1Config::Mode::kAdaptiveDigital:
|
||||
return "AdaptiveDigital";
|
||||
case Agc1Config::Mode::kFixedDigital:
|
||||
return "FixedDigital";
|
||||
}
|
||||
RTC_CHECK_NOTREACHED();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
constexpr int AudioProcessing::kNativeSampleRatesHz[];
|
||||
|
||||
void CustomProcessing::SetRuntimeSetting(
|
||||
AudioProcessing::RuntimeSetting setting) {}
|
||||
|
||||
bool Agc1Config::operator==(const Agc1Config& rhs) const {
|
||||
const auto& analog_lhs = analog_gain_controller;
|
||||
const auto& analog_rhs = rhs.analog_gain_controller;
|
||||
return enabled == rhs.enabled && mode == rhs.mode &&
|
||||
target_level_dbfs == rhs.target_level_dbfs &&
|
||||
compression_gain_db == rhs.compression_gain_db &&
|
||||
enable_limiter == rhs.enable_limiter &&
|
||||
analog_lhs.enabled == analog_rhs.enabled &&
|
||||
analog_lhs.startup_min_volume == analog_rhs.startup_min_volume &&
|
||||
analog_lhs.clipped_level_min == analog_rhs.clipped_level_min &&
|
||||
analog_lhs.enable_digital_adaptive ==
|
||||
analog_rhs.enable_digital_adaptive &&
|
||||
analog_lhs.clipped_level_step == analog_rhs.clipped_level_step &&
|
||||
analog_lhs.clipped_ratio_threshold ==
|
||||
analog_rhs.clipped_ratio_threshold &&
|
||||
analog_lhs.clipped_wait_frames == analog_rhs.clipped_wait_frames &&
|
||||
analog_lhs.clipping_predictor.mode ==
|
||||
analog_rhs.clipping_predictor.mode &&
|
||||
analog_lhs.clipping_predictor.window_length ==
|
||||
analog_rhs.clipping_predictor.window_length &&
|
||||
analog_lhs.clipping_predictor.reference_window_length ==
|
||||
analog_rhs.clipping_predictor.reference_window_length &&
|
||||
analog_lhs.clipping_predictor.reference_window_delay ==
|
||||
analog_rhs.clipping_predictor.reference_window_delay &&
|
||||
analog_lhs.clipping_predictor.clipping_threshold ==
|
||||
analog_rhs.clipping_predictor.clipping_threshold &&
|
||||
analog_lhs.clipping_predictor.crest_factor_margin ==
|
||||
analog_rhs.clipping_predictor.crest_factor_margin &&
|
||||
analog_lhs.clipping_predictor.use_predicted_step ==
|
||||
analog_rhs.clipping_predictor.use_predicted_step;
|
||||
}
|
||||
|
||||
bool Agc2Config::AdaptiveDigital::operator==(
|
||||
const Agc2Config::AdaptiveDigital& rhs) const {
|
||||
return enabled == rhs.enabled && headroom_db == rhs.headroom_db &&
|
||||
max_gain_db == rhs.max_gain_db &&
|
||||
initial_gain_db == rhs.initial_gain_db &&
|
||||
max_gain_change_db_per_second == rhs.max_gain_change_db_per_second &&
|
||||
max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs;
|
||||
}
|
||||
|
||||
bool Agc2Config::InputVolumeController::operator==(
|
||||
const Agc2Config::InputVolumeController& rhs) const {
|
||||
return enabled == rhs.enabled;
|
||||
}
|
||||
|
||||
bool Agc2Config::operator==(const Agc2Config& rhs) const {
|
||||
return enabled == rhs.enabled &&
|
||||
fixed_digital.gain_db == rhs.fixed_digital.gain_db &&
|
||||
adaptive_digital == rhs.adaptive_digital &&
|
||||
input_volume_controller == rhs.input_volume_controller;
|
||||
}
|
||||
|
||||
bool AudioProcessing::Config::CaptureLevelAdjustment::operator==(
|
||||
const AudioProcessing::Config::CaptureLevelAdjustment& rhs) const {
|
||||
return enabled == rhs.enabled && pre_gain_factor == rhs.pre_gain_factor &&
|
||||
post_gain_factor == rhs.post_gain_factor &&
|
||||
analog_mic_gain_emulation == rhs.analog_mic_gain_emulation;
|
||||
}
|
||||
|
||||
bool AudioProcessing::Config::CaptureLevelAdjustment::AnalogMicGainEmulation::
|
||||
operator==(const AudioProcessing::Config::CaptureLevelAdjustment::
|
||||
AnalogMicGainEmulation& rhs) const {
|
||||
return enabled == rhs.enabled && initial_level == rhs.initial_level;
|
||||
}
|
||||
|
||||
std::string AudioProcessing::Config::ToString() const {
|
||||
char buf[2048];
|
||||
rtc::SimpleStringBuilder builder(buf);
|
||||
builder << "AudioProcessing::Config{ "
|
||||
"pipeline: { "
|
||||
"maximum_internal_processing_rate: "
|
||||
<< pipeline.maximum_internal_processing_rate
|
||||
<< ", multi_channel_render: " << pipeline.multi_channel_render
|
||||
<< ", multi_channel_capture: " << pipeline.multi_channel_capture
|
||||
<< " }, pre_amplifier: { enabled: " << pre_amplifier.enabled
|
||||
<< ", fixed_gain_factor: " << pre_amplifier.fixed_gain_factor
|
||||
<< " },capture_level_adjustment: { enabled: "
|
||||
<< capture_level_adjustment.enabled
|
||||
<< ", pre_gain_factor: " << capture_level_adjustment.pre_gain_factor
|
||||
<< ", post_gain_factor: " << capture_level_adjustment.post_gain_factor
|
||||
<< ", analog_mic_gain_emulation: { enabled: "
|
||||
<< capture_level_adjustment.analog_mic_gain_emulation.enabled
|
||||
<< ", initial_level: "
|
||||
<< capture_level_adjustment.analog_mic_gain_emulation.initial_level
|
||||
<< " }}, high_pass_filter: { enabled: " << high_pass_filter.enabled
|
||||
<< " }, echo_canceller: { enabled: " << echo_canceller.enabled
|
||||
<< ", mobile_mode: " << echo_canceller.mobile_mode
|
||||
<< ", enforce_high_pass_filtering: "
|
||||
<< echo_canceller.enforce_high_pass_filtering
|
||||
<< " }, noise_suppression: { enabled: " << noise_suppression.enabled
|
||||
<< ", level: "
|
||||
<< NoiseSuppressionLevelToString(noise_suppression.level)
|
||||
<< " }, transient_suppression: { enabled: "
|
||||
<< transient_suppression.enabled
|
||||
<< " }, gain_controller1: { enabled: " << gain_controller1.enabled
|
||||
<< ", mode: " << GainController1ModeToString(gain_controller1.mode)
|
||||
<< ", target_level_dbfs: " << gain_controller1.target_level_dbfs
|
||||
<< ", compression_gain_db: " << gain_controller1.compression_gain_db
|
||||
<< ", enable_limiter: " << gain_controller1.enable_limiter
|
||||
<< ", analog_gain_controller { enabled: "
|
||||
<< gain_controller1.analog_gain_controller.enabled
|
||||
<< ", startup_min_volume: "
|
||||
<< gain_controller1.analog_gain_controller.startup_min_volume
|
||||
<< ", clipped_level_min: "
|
||||
<< gain_controller1.analog_gain_controller.clipped_level_min
|
||||
<< ", enable_digital_adaptive: "
|
||||
<< gain_controller1.analog_gain_controller.enable_digital_adaptive
|
||||
<< ", clipped_level_step: "
|
||||
<< gain_controller1.analog_gain_controller.clipped_level_step
|
||||
<< ", clipped_ratio_threshold: "
|
||||
<< gain_controller1.analog_gain_controller.clipped_ratio_threshold
|
||||
<< ", clipped_wait_frames: "
|
||||
<< gain_controller1.analog_gain_controller.clipped_wait_frames
|
||||
<< ", clipping_predictor: { enabled: "
|
||||
<< gain_controller1.analog_gain_controller.clipping_predictor.enabled
|
||||
<< ", mode: "
|
||||
<< gain_controller1.analog_gain_controller.clipping_predictor.mode
|
||||
<< ", window_length: "
|
||||
<< gain_controller1.analog_gain_controller.clipping_predictor
|
||||
.window_length
|
||||
<< ", reference_window_length: "
|
||||
<< gain_controller1.analog_gain_controller.clipping_predictor
|
||||
.reference_window_length
|
||||
<< ", reference_window_delay: "
|
||||
<< gain_controller1.analog_gain_controller.clipping_predictor
|
||||
.reference_window_delay
|
||||
<< ", clipping_threshold: "
|
||||
<< gain_controller1.analog_gain_controller.clipping_predictor
|
||||
.clipping_threshold
|
||||
<< ", crest_factor_margin: "
|
||||
<< gain_controller1.analog_gain_controller.clipping_predictor
|
||||
.crest_factor_margin
|
||||
<< ", use_predicted_step: "
|
||||
<< gain_controller1.analog_gain_controller.clipping_predictor
|
||||
.use_predicted_step
|
||||
<< " }}}, gain_controller2: { enabled: " << gain_controller2.enabled
|
||||
<< ", fixed_digital: { gain_db: "
|
||||
<< gain_controller2.fixed_digital.gain_db
|
||||
<< " }, adaptive_digital: { enabled: "
|
||||
<< gain_controller2.adaptive_digital.enabled
|
||||
<< ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db
|
||||
<< ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db
|
||||
<< ", initial_gain_db: "
|
||||
<< gain_controller2.adaptive_digital.initial_gain_db
|
||||
<< ", max_gain_change_db_per_second: "
|
||||
<< gain_controller2.adaptive_digital.max_gain_change_db_per_second
|
||||
<< ", max_output_noise_level_dbfs: "
|
||||
<< gain_controller2.adaptive_digital.max_output_noise_level_dbfs
|
||||
<< " }, input_volume_control : { enabled "
|
||||
<< gain_controller2.input_volume_controller.enabled << "}}";
|
||||
return builder.str();
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
944
VocieProcess/api/audio/audio_processing.h
Normal file
944
VocieProcess/api/audio/audio_processing.h
Normal file
@ -0,0 +1,944 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_AUDIO_AUDIO_PROCESSING_H_
|
||||
#define API_AUDIO_AUDIO_PROCESSING_H_
|
||||
|
||||
// MSVC++ requires this to be set before any other includes to get M_PI.
|
||||
#ifndef _USE_MATH_DEFINES
|
||||
#define _USE_MATH_DEFINES
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h> // size_t
|
||||
#include <stdio.h> // FILE
|
||||
#include <string.h>
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/base/nullability.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/audio_processing_statistics.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "api/ref_count.h"
|
||||
#include "api/scoped_refptr.h"
|
||||
#include "api/task_queue/task_queue_base.h"
|
||||
#include "rtc_base/arraysize.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/system/rtc_export.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AecDump;
|
||||
class AudioBuffer;
|
||||
|
||||
class StreamConfig;
|
||||
class ProcessingConfig;
|
||||
|
||||
class EchoDetector;
|
||||
|
||||
// The Audio Processing Module (APM) provides a collection of voice processing
|
||||
// components designed for real-time communications software.
|
||||
//
|
||||
// APM operates on two audio streams on a frame-by-frame basis. Frames of the
|
||||
// primary stream, on which all processing is applied, are passed to
|
||||
// `ProcessStream()`. Frames of the reverse direction stream are passed to
|
||||
// `ProcessReverseStream()`. On the client-side, this will typically be the
|
||||
// near-end (capture) and far-end (render) streams, respectively. APM should be
|
||||
// placed in the signal chain as close to the audio hardware abstraction layer
|
||||
// (HAL) as possible.
|
||||
//
|
||||
// On the server-side, the reverse stream will normally not be used, with
|
||||
// processing occurring on each incoming stream.
|
||||
//
|
||||
// Component interfaces follow a similar pattern and are accessed through
|
||||
// corresponding getters in APM. All components are disabled at create-time,
|
||||
// with default settings that are recommended for most situations. New settings
|
||||
// can be applied without enabling a component. Enabling a component triggers
|
||||
// memory allocation and initialization to allow it to start processing the
|
||||
// streams.
|
||||
//
|
||||
// Thread safety is provided with the following assumptions to reduce locking
|
||||
// overhead:
|
||||
// 1. The stream getters and setters are called from the same thread as
|
||||
// ProcessStream(). More precisely, stream functions are never called
|
||||
// concurrently with ProcessStream().
|
||||
// 2. Parameter getters are never called concurrently with the corresponding
|
||||
// setter.
|
||||
//
|
||||
// APM accepts only linear PCM audio data in chunks of ~10 ms (see
|
||||
// AudioProcessing::GetFrameSize() for details) and sample rates ranging from
|
||||
// 8000 Hz to 384000 Hz. The int16 interfaces use interleaved data, while the
|
||||
// float interfaces use deinterleaved data.
|
||||
//
|
||||
// Usage example, omitting error checking:
|
||||
// rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
|
||||
//
|
||||
// AudioProcessing::Config config;
|
||||
// config.echo_canceller.enabled = true;
|
||||
// config.echo_canceller.mobile_mode = false;
|
||||
//
|
||||
// config.gain_controller1.enabled = true;
|
||||
// config.gain_controller1.mode =
|
||||
// AudioProcessing::Config::GainController1::kAdaptiveAnalog;
|
||||
// config.gain_controller1.analog_level_minimum = 0;
|
||||
// config.gain_controller1.analog_level_maximum = 255;
|
||||
//
|
||||
// config.gain_controller2.enabled = true;
|
||||
//
|
||||
// config.high_pass_filter.enabled = true;
|
||||
//
|
||||
// apm->ApplyConfig(config)
|
||||
//
|
||||
// // Start a voice call...
|
||||
//
|
||||
// // ... Render frame arrives bound for the audio HAL ...
|
||||
// apm->ProcessReverseStream(render_frame);
|
||||
//
|
||||
// // ... Capture frame arrives from the audio HAL ...
|
||||
// // Call required set_stream_ functions.
|
||||
// apm->set_stream_delay_ms(delay_ms);
|
||||
// apm->set_stream_analog_level(analog_level);
|
||||
//
|
||||
// apm->ProcessStream(capture_frame);
|
||||
//
|
||||
// // Call required stream_ functions.
|
||||
// analog_level = apm->recommended_stream_analog_level();
|
||||
// has_voice = apm->stream_has_voice();
|
||||
//
|
||||
// // Repeat render and capture processing for the duration of the call...
|
||||
// // Start a new call...
|
||||
// apm->Initialize();
|
||||
//
|
||||
// // Close the application...
|
||||
// apm.reset();
|
||||
//
|
||||
class RTC_EXPORT AudioProcessing : public RefCountInterface {
|
||||
public:
|
||||
// The struct below constitutes the new parameter scheme for the audio
|
||||
// processing. It is being introduced gradually and until it is fully
|
||||
// introduced, it is prone to change.
|
||||
// TODO(peah): Remove this comment once the new config scheme is fully rolled
|
||||
// out.
|
||||
//
|
||||
// The parameters and behavior of the audio processing module are controlled
|
||||
// by changing the default values in the AudioProcessing::Config struct.
|
||||
// The config is applied by passing the struct to the ApplyConfig method.
|
||||
//
|
||||
// This config is intended to be used during setup, and to enable/disable
|
||||
// top-level processing effects. Use during processing may cause undesired
|
||||
// submodule resets, affecting the audio quality. Use the RuntimeSetting
|
||||
// construct for runtime configuration.
|
||||
struct RTC_EXPORT Config {
|
||||
// Sets the properties of the audio processing pipeline.
|
||||
struct RTC_EXPORT Pipeline {
|
||||
// Ways to downmix a multi-channel track to mono.
|
||||
enum class DownmixMethod {
|
||||
kAverageChannels, // Average across channels.
|
||||
kUseFirstChannel // Use the first channel.
|
||||
};
|
||||
|
||||
// Maximum allowed processing rate used internally. May only be set to
|
||||
// 32000 or 48000 and any differing values will be treated as 48000.
|
||||
int maximum_internal_processing_rate = 48000;
|
||||
// Allow multi-channel processing of render audio.
|
||||
bool multi_channel_render = false;
|
||||
// Allow multi-channel processing of capture audio when AEC3 is active
|
||||
// or a custom AEC is injected..
|
||||
bool multi_channel_capture = false;
|
||||
// Indicates how to downmix multi-channel capture audio to mono (when
|
||||
// needed).
|
||||
DownmixMethod capture_downmix_method = DownmixMethod::kAverageChannels;
|
||||
} pipeline;
|
||||
|
||||
// Enabled the pre-amplifier. It amplifies the capture signal
|
||||
// before any other processing is done.
|
||||
// TODO(webrtc:5298): Deprecate and use the pre-gain functionality in
|
||||
// capture_level_adjustment instead.
|
||||
struct PreAmplifier {
|
||||
bool enabled = false;
|
||||
float fixed_gain_factor = 1.0f;
|
||||
} pre_amplifier;
|
||||
|
||||
// Functionality for general level adjustment in the capture pipeline. This
|
||||
// should not be used together with the legacy PreAmplifier functionality.
|
||||
struct CaptureLevelAdjustment {
|
||||
bool operator==(const CaptureLevelAdjustment& rhs) const;
|
||||
bool operator!=(const CaptureLevelAdjustment& rhs) const {
|
||||
return !(*this == rhs);
|
||||
}
|
||||
bool enabled = false;
|
||||
// The `pre_gain_factor` scales the signal before any processing is done.
|
||||
float pre_gain_factor = 1.0f;
|
||||
// The `post_gain_factor` scales the signal after all processing is done.
|
||||
float post_gain_factor = 1.0f;
|
||||
struct AnalogMicGainEmulation {
|
||||
bool operator==(const AnalogMicGainEmulation& rhs) const;
|
||||
bool operator!=(const AnalogMicGainEmulation& rhs) const {
|
||||
return !(*this == rhs);
|
||||
}
|
||||
bool enabled = false;
|
||||
// Initial analog gain level to use for the emulated analog gain. Must
|
||||
// be in the range [0...255].
|
||||
int initial_level = 255;
|
||||
} analog_mic_gain_emulation;
|
||||
} capture_level_adjustment;
|
||||
|
||||
struct HighPassFilter {
|
||||
bool enabled = false;
|
||||
bool apply_in_full_band = true;
|
||||
} high_pass_filter;
|
||||
|
||||
struct EchoCanceller {
|
||||
bool enabled = false;
|
||||
bool mobile_mode = false;
|
||||
bool export_linear_aec_output = false;
|
||||
// Enforce the highpass filter to be on (has no effect for the mobile
|
||||
// mode).
|
||||
bool enforce_high_pass_filtering = true;
|
||||
} echo_canceller;
|
||||
|
||||
// Enables background noise suppression.
|
||||
struct NoiseSuppression {
|
||||
bool enabled = false;
|
||||
enum Level { kLow, kModerate, kHigh, kVeryHigh };
|
||||
Level level = kModerate;
|
||||
bool analyze_linear_aec_output_when_available = false;
|
||||
} noise_suppression;
|
||||
|
||||
// TODO(bugs.webrtc.org/357281131): Deprecated. Stop using and remove.
|
||||
// Enables transient suppression.
|
||||
struct TransientSuppression {
|
||||
bool enabled = false;
|
||||
} transient_suppression;
|
||||
|
||||
// Enables automatic gain control (AGC) functionality.
|
||||
// The automatic gain control (AGC) component brings the signal to an
|
||||
// appropriate range. This is done by applying a digital gain directly and,
|
||||
// in the analog mode, prescribing an analog gain to be applied at the audio
|
||||
// HAL.
|
||||
// Recommended to be enabled on the client-side.
|
||||
struct RTC_EXPORT GainController1 {
|
||||
bool operator==(const GainController1& rhs) const;
|
||||
bool operator!=(const GainController1& rhs) const {
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
bool enabled = false;
|
||||
enum Mode {
|
||||
// Adaptive mode intended for use if an analog volume control is
|
||||
// available on the capture device. It will require the user to provide
|
||||
// coupling between the OS mixer controls and AGC through the
|
||||
// stream_analog_level() functions.
|
||||
// It consists of an analog gain prescription for the audio device and a
|
||||
// digital compression stage.
|
||||
kAdaptiveAnalog,
|
||||
// Adaptive mode intended for situations in which an analog volume
|
||||
// control is unavailable. It operates in a similar fashion to the
|
||||
// adaptive analog mode, but with scaling instead applied in the digital
|
||||
// domain. As with the analog mode, it additionally uses a digital
|
||||
// compression stage.
|
||||
kAdaptiveDigital,
|
||||
// Fixed mode which enables only the digital compression stage also used
|
||||
// by the two adaptive modes.
|
||||
// It is distinguished from the adaptive modes by considering only a
|
||||
// short time-window of the input signal. It applies a fixed gain
|
||||
// through most of the input level range, and compresses (gradually
|
||||
// reduces gain with increasing level) the input signal at higher
|
||||
// levels. This mode is preferred on embedded devices where the capture
|
||||
// signal level is predictable, so that a known gain can be applied.
|
||||
kFixedDigital
|
||||
};
|
||||
Mode mode = kAdaptiveAnalog;
|
||||
// Sets the target peak level (or envelope) of the AGC in dBFs (decibels
|
||||
// from digital full-scale). The convention is to use positive values. For
|
||||
// instance, passing in a value of 3 corresponds to -3 dBFs, or a target
|
||||
// level 3 dB below full-scale. Limited to [0, 31].
|
||||
int target_level_dbfs = 3;
|
||||
// Sets the maximum gain the digital compression stage may apply, in dB. A
|
||||
// higher number corresponds to greater compression, while a value of 0
|
||||
// will leave the signal uncompressed. Limited to [0, 90].
|
||||
// For updates after APM setup, use a RuntimeSetting instead.
|
||||
int compression_gain_db = 9;
|
||||
// When enabled, the compression stage will hard limit the signal to the
|
||||
// target level. Otherwise, the signal will be compressed but not limited
|
||||
// above the target level.
|
||||
bool enable_limiter = true;
|
||||
|
||||
// Enables the analog gain controller functionality.
|
||||
struct AnalogGainController {
|
||||
bool enabled = true;
|
||||
// TODO(bugs.webrtc.org/7494): Deprecated. Stop using and remove.
|
||||
int startup_min_volume = 0;
|
||||
// Lowest analog microphone level that will be applied in response to
|
||||
// clipping.
|
||||
int clipped_level_min = 70;
|
||||
// If true, an adaptive digital gain is applied.
|
||||
bool enable_digital_adaptive = true;
|
||||
// Amount the microphone level is lowered with every clipping event.
|
||||
// Limited to (0, 255].
|
||||
int clipped_level_step = 15;
|
||||
// Proportion of clipped samples required to declare a clipping event.
|
||||
// Limited to (0.f, 1.f).
|
||||
float clipped_ratio_threshold = 0.1f;
|
||||
// Time in frames to wait after a clipping event before checking again.
|
||||
// Limited to values higher than 0.
|
||||
int clipped_wait_frames = 300;
|
||||
|
||||
// Enables clipping prediction functionality.
|
||||
struct ClippingPredictor {
|
||||
bool enabled = false;
|
||||
enum Mode {
|
||||
// Clipping event prediction mode with fixed step estimation.
|
||||
kClippingEventPrediction,
|
||||
// Clipped peak estimation mode with adaptive step estimation.
|
||||
kAdaptiveStepClippingPeakPrediction,
|
||||
// Clipped peak estimation mode with fixed step estimation.
|
||||
kFixedStepClippingPeakPrediction,
|
||||
};
|
||||
Mode mode = kClippingEventPrediction;
|
||||
// Number of frames in the sliding analysis window.
|
||||
int window_length = 5;
|
||||
// Number of frames in the sliding reference window.
|
||||
int reference_window_length = 5;
|
||||
// Reference window delay (unit: number of frames).
|
||||
int reference_window_delay = 5;
|
||||
// Clipping prediction threshold (dBFS).
|
||||
float clipping_threshold = -1.0f;
|
||||
// Crest factor drop threshold (dB).
|
||||
float crest_factor_margin = 3.0f;
|
||||
// If true, the recommended clipped level step is used to modify the
|
||||
// analog gain. Otherwise, the predictor runs without affecting the
|
||||
// analog gain.
|
||||
bool use_predicted_step = true;
|
||||
} clipping_predictor;
|
||||
} analog_gain_controller;
|
||||
} gain_controller1;
|
||||
|
||||
// Parameters for AGC2, an Automatic Gain Control (AGC) sub-module which
|
||||
// replaces the AGC sub-module parametrized by `gain_controller1`.
|
||||
// AGC2 brings the captured audio signal to the desired level by combining
|
||||
// three different controllers (namely, input volume controller, adapative
|
||||
// digital controller and fixed digital controller) and a limiter.
|
||||
// TODO(bugs.webrtc.org:7494): Name `GainController` when AGC1 removed.
|
||||
struct RTC_EXPORT GainController2 {
|
||||
bool operator==(const GainController2& rhs) const;
|
||||
bool operator!=(const GainController2& rhs) const {
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
// AGC2 must be created if and only if `enabled` is true.
|
||||
bool enabled = false;
|
||||
|
||||
// Parameters for the input volume controller, which adjusts the input
|
||||
// volume applied when the audio is captured (e.g., microphone volume on
|
||||
// a soundcard, input volume on HAL).
|
||||
struct InputVolumeController {
|
||||
bool operator==(const InputVolumeController& rhs) const;
|
||||
bool operator!=(const InputVolumeController& rhs) const {
|
||||
return !(*this == rhs);
|
||||
}
|
||||
bool enabled = false;
|
||||
} input_volume_controller;
|
||||
|
||||
// Parameters for the adaptive digital controller, which adjusts and
|
||||
// applies a digital gain after echo cancellation and after noise
|
||||
// suppression.
|
||||
struct RTC_EXPORT AdaptiveDigital {
|
||||
bool operator==(const AdaptiveDigital& rhs) const;
|
||||
bool operator!=(const AdaptiveDigital& rhs) const {
|
||||
return !(*this == rhs);
|
||||
}
|
||||
bool enabled = false;
|
||||
float headroom_db = 5.0f;
|
||||
float max_gain_db = 50.0f;
|
||||
float initial_gain_db = 15.0f;
|
||||
float max_gain_change_db_per_second = 6.0f;
|
||||
float max_output_noise_level_dbfs = -50.0f;
|
||||
} adaptive_digital;
|
||||
|
||||
// Parameters for the fixed digital controller, which applies a fixed
|
||||
// digital gain after the adaptive digital controller and before the
|
||||
// limiter.
|
||||
struct FixedDigital {
|
||||
// By setting `gain_db` to a value greater than zero, the limiter can be
|
||||
// turned into a compressor that first applies a fixed gain.
|
||||
float gain_db = 0.0f;
|
||||
} fixed_digital;
|
||||
} gain_controller2;
|
||||
|
||||
std::string ToString() const;
|
||||
};
|
||||
|
||||
// Specifies the properties of a setting to be passed to AudioProcessing at
|
||||
// runtime.
|
||||
class RuntimeSetting {
|
||||
public:
|
||||
enum class Type {
|
||||
kNotSpecified,
|
||||
kCapturePreGain,
|
||||
kCaptureCompressionGain,
|
||||
kCaptureFixedPostGain,
|
||||
kPlayoutVolumeChange,
|
||||
kCustomRenderProcessingRuntimeSetting,
|
||||
kPlayoutAudioDeviceChange,
|
||||
kCapturePostGain,
|
||||
kCaptureOutputUsed
|
||||
};
|
||||
|
||||
// Play-out audio device properties.
|
||||
struct PlayoutAudioDeviceInfo {
|
||||
int id; // Identifies the audio device.
|
||||
int max_volume; // Maximum play-out volume.
|
||||
};
|
||||
|
||||
RuntimeSetting() : type_(Type::kNotSpecified), value_(0.0f) {}
|
||||
~RuntimeSetting() = default;
|
||||
|
||||
static RuntimeSetting CreateCapturePreGain(float gain) {
|
||||
return {Type::kCapturePreGain, gain};
|
||||
}
|
||||
|
||||
static RuntimeSetting CreateCapturePostGain(float gain) {
|
||||
return {Type::kCapturePostGain, gain};
|
||||
}
|
||||
|
||||
// Corresponds to Config::GainController1::compression_gain_db, but for
|
||||
// runtime configuration.
|
||||
static RuntimeSetting CreateCompressionGainDb(int gain_db) {
|
||||
RTC_DCHECK_GE(gain_db, 0);
|
||||
RTC_DCHECK_LE(gain_db, 90);
|
||||
return {Type::kCaptureCompressionGain, static_cast<float>(gain_db)};
|
||||
}
|
||||
|
||||
// Corresponds to Config::GainController2::fixed_digital::gain_db, but for
|
||||
// runtime configuration.
|
||||
static RuntimeSetting CreateCaptureFixedPostGain(float gain_db) {
|
||||
RTC_DCHECK_GE(gain_db, 0.0f);
|
||||
RTC_DCHECK_LE(gain_db, 90.0f);
|
||||
return {Type::kCaptureFixedPostGain, gain_db};
|
||||
}
|
||||
|
||||
// Creates a runtime setting to notify play-out (aka render) audio device
|
||||
// changes.
|
||||
static RuntimeSetting CreatePlayoutAudioDeviceChange(
|
||||
PlayoutAudioDeviceInfo audio_device) {
|
||||
return {Type::kPlayoutAudioDeviceChange, audio_device};
|
||||
}
|
||||
|
||||
// Creates a runtime setting to notify play-out (aka render) volume changes.
|
||||
// `volume` is the unnormalized volume, the maximum of which
|
||||
static RuntimeSetting CreatePlayoutVolumeChange(int volume) {
|
||||
return {Type::kPlayoutVolumeChange, volume};
|
||||
}
|
||||
|
||||
static RuntimeSetting CreateCustomRenderSetting(float payload) {
|
||||
return {Type::kCustomRenderProcessingRuntimeSetting, payload};
|
||||
}
|
||||
|
||||
static RuntimeSetting CreateCaptureOutputUsedSetting(
|
||||
bool capture_output_used) {
|
||||
return {Type::kCaptureOutputUsed, capture_output_used};
|
||||
}
|
||||
|
||||
Type type() const { return type_; }
|
||||
// Getters do not return a value but instead modify the argument to protect
|
||||
// from implicit casting.
|
||||
void GetFloat(float* value) const {
|
||||
RTC_DCHECK(value);
|
||||
*value = value_.float_value;
|
||||
}
|
||||
void GetInt(int* value) const {
|
||||
RTC_DCHECK(value);
|
||||
*value = value_.int_value;
|
||||
}
|
||||
void GetBool(bool* value) const {
|
||||
RTC_DCHECK(value);
|
||||
*value = value_.bool_value;
|
||||
}
|
||||
void GetPlayoutAudioDeviceInfo(PlayoutAudioDeviceInfo* value) const {
|
||||
RTC_DCHECK(value);
|
||||
*value = value_.playout_audio_device_info;
|
||||
}
|
||||
|
||||
private:
|
||||
RuntimeSetting(Type id, float value) : type_(id), value_(value) {}
|
||||
RuntimeSetting(Type id, int value) : type_(id), value_(value) {}
|
||||
RuntimeSetting(Type id, PlayoutAudioDeviceInfo value)
|
||||
: type_(id), value_(value) {}
|
||||
Type type_;
|
||||
union U {
|
||||
U() {}
|
||||
U(int value) : int_value(value) {}
|
||||
U(float value) : float_value(value) {}
|
||||
U(PlayoutAudioDeviceInfo value) : playout_audio_device_info(value) {}
|
||||
float float_value;
|
||||
int int_value;
|
||||
bool bool_value;
|
||||
PlayoutAudioDeviceInfo playout_audio_device_info;
|
||||
} value_;
|
||||
};
|
||||
|
||||
~AudioProcessing() override {}
|
||||
|
||||
// Initializes internal states, while retaining all user settings. This
|
||||
// should be called before beginning to process a new audio stream. However,
|
||||
// it is not necessary to call before processing the first stream after
|
||||
// creation.
|
||||
//
|
||||
// It is also not necessary to call if the audio parameters (sample
|
||||
// rate and number of channels) have changed. Passing updated parameters
|
||||
// directly to `ProcessStream()` and `ProcessReverseStream()` is permissible.
|
||||
// If the parameters are known at init-time though, they may be provided.
|
||||
// TODO(webrtc:5298): Change to return void.
|
||||
virtual int Initialize() = 0;
|
||||
|
||||
// The int16 interfaces require:
|
||||
// - only `NativeRate`s be used
|
||||
// - that the input, output and reverse rates must match
|
||||
// - that `processing_config.output_stream()` matches
|
||||
// `processing_config.input_stream()`.
|
||||
//
|
||||
// The float interfaces accept arbitrary rates and support differing input and
|
||||
// output layouts, but the output must have either one channel or the same
|
||||
// number of channels as the input.
|
||||
virtual int Initialize(const ProcessingConfig& processing_config) = 0;
|
||||
|
||||
// TODO(peah): This method is a temporary solution used to take control
|
||||
// over the parameters in the audio processing module and is likely to change.
|
||||
virtual void ApplyConfig(const Config& config) = 0;
|
||||
|
||||
// TODO(ajm): Only intended for internal use. Make private and friend the
|
||||
// necessary classes?
|
||||
virtual int proc_sample_rate_hz() const = 0;
|
||||
virtual int proc_split_sample_rate_hz() const = 0;
|
||||
virtual size_t num_input_channels() const = 0;
|
||||
virtual size_t num_proc_channels() const = 0;
|
||||
virtual size_t num_output_channels() const = 0;
|
||||
virtual size_t num_reverse_channels() const = 0;
|
||||
|
||||
// Set to true when the output of AudioProcessing will be muted or in some
|
||||
// other way not used. Ideally, the captured audio would still be processed,
|
||||
// but some components may change behavior based on this information.
|
||||
// Default false. This method takes a lock. To achieve this in a lock-less
|
||||
// manner the PostRuntimeSetting can instead be used.
|
||||
virtual void set_output_will_be_muted(bool muted) = 0;
|
||||
|
||||
// Enqueues a runtime setting.
|
||||
virtual void SetRuntimeSetting(RuntimeSetting setting) = 0;
|
||||
|
||||
// Enqueues a runtime setting. Returns a bool indicating whether the
|
||||
// enqueueing was successfull.
|
||||
virtual bool PostRuntimeSetting(RuntimeSetting setting) = 0;
|
||||
|
||||
// Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio as
|
||||
// specified in `input_config` and `output_config`. `src` and `dest` may use
|
||||
// the same memory, if desired.
|
||||
virtual int ProcessStream(const int16_t* const src,
|
||||
const StreamConfig& input_config,
|
||||
const StreamConfig& output_config,
|
||||
int16_t* const dest) = 0;
|
||||
|
||||
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
|
||||
// `src` points to a channel buffer, arranged according to `input_stream`. At
|
||||
// output, the channels will be arranged according to `output_stream` in
|
||||
// `dest`.
|
||||
//
|
||||
// The output must have one channel or as many channels as the input. `src`
|
||||
// and `dest` may use the same memory, if desired.
|
||||
virtual int ProcessStream(const float* const* src,
|
||||
const StreamConfig& input_config,
|
||||
const StreamConfig& output_config,
|
||||
float* const* dest) = 0;
|
||||
|
||||
// Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio for
|
||||
// the reverse direction audio stream as specified in `input_config` and
|
||||
// `output_config`. `src` and `dest` may use the same memory, if desired.
|
||||
virtual int ProcessReverseStream(const int16_t* const src,
|
||||
const StreamConfig& input_config,
|
||||
const StreamConfig& output_config,
|
||||
int16_t* const dest) = 0;
|
||||
|
||||
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
|
||||
// `data` points to a channel buffer, arranged according to `reverse_config`.
|
||||
virtual int ProcessReverseStream(const float* const* src,
|
||||
const StreamConfig& input_config,
|
||||
const StreamConfig& output_config,
|
||||
float* const* dest) = 0;
|
||||
|
||||
// Accepts deinterleaved float audio with the range [-1, 1]. Each element
|
||||
// of `data` points to a channel buffer, arranged according to
|
||||
// `reverse_config`.
|
||||
virtual int AnalyzeReverseStream(const float* const* data,
|
||||
const StreamConfig& reverse_config) = 0;
|
||||
|
||||
// Returns the most recently produced ~10 ms of the linear AEC output at a
|
||||
// rate of 16 kHz. If there is more than one capture channel, a mono
|
||||
// representation of the input is returned. Returns true/false to indicate
|
||||
// whether an output returned.
|
||||
virtual bool GetLinearAecOutput(
|
||||
rtc::ArrayView<std::array<float, 160>> linear_output) const = 0;
|
||||
|
||||
// This must be called prior to ProcessStream() if and only if adaptive analog
|
||||
// gain control is enabled, to pass the current analog level from the audio
|
||||
// HAL. Must be within the range [0, 255].
|
||||
virtual void set_stream_analog_level(int level) = 0;
|
||||
|
||||
// When an analog mode is set, this should be called after
|
||||
// `set_stream_analog_level()` and `ProcessStream()` to obtain the recommended
|
||||
// new analog level for the audio HAL. It is the user's responsibility to
|
||||
// apply this level.
|
||||
virtual int recommended_stream_analog_level() const = 0;
|
||||
|
||||
// This must be called if and only if echo processing is enabled.
|
||||
//
|
||||
// Sets the `delay` in ms between ProcessReverseStream() receiving a far-end
|
||||
// frame and ProcessStream() receiving a near-end frame containing the
|
||||
// corresponding echo. On the client-side this can be expressed as
|
||||
// delay = (t_render - t_analyze) + (t_process - t_capture)
|
||||
// where,
|
||||
// - t_analyze is the time a frame is passed to ProcessReverseStream() and
|
||||
// t_render is the time the first sample of the same frame is rendered by
|
||||
// the audio hardware.
|
||||
// - t_capture is the time the first sample of a frame is captured by the
|
||||
// audio hardware and t_process is the time the same frame is passed to
|
||||
// ProcessStream().
|
||||
virtual int set_stream_delay_ms(int delay) = 0;
|
||||
virtual int stream_delay_ms() const = 0;
|
||||
|
||||
// Call to signal that a key press occurred (true) or did not occur (false)
|
||||
// with this chunk of audio.
|
||||
virtual void set_stream_key_pressed(bool key_pressed) = 0;
|
||||
|
||||
// Creates and attaches an webrtc::AecDump for recording debugging
|
||||
// information.
|
||||
// The `worker_queue` may not be null and must outlive the created
|
||||
// AecDump instance. |max_log_size_bytes == -1| means the log size
|
||||
// will be unlimited. `handle` may not be null. The AecDump takes
|
||||
// responsibility for `handle` and closes it in the destructor. A
|
||||
// return value of true indicates that the file has been
|
||||
// sucessfully opened, while a value of false indicates that
|
||||
// opening the file failed.
|
||||
virtual bool CreateAndAttachAecDump(
|
||||
absl::string_view file_name,
|
||||
int64_t max_log_size_bytes,
|
||||
absl::Nonnull<TaskQueueBase*> worker_queue) = 0;
|
||||
virtual bool CreateAndAttachAecDump(
|
||||
absl::Nonnull<FILE*> handle,
|
||||
int64_t max_log_size_bytes,
|
||||
absl::Nonnull<TaskQueueBase*> worker_queue) = 0;
|
||||
|
||||
// TODO(webrtc:5298) Deprecated variant.
|
||||
// Attaches provided webrtc::AecDump for recording debugging
|
||||
// information. Log file and maximum file size logic is supposed to
|
||||
// be handled by implementing instance of AecDump. Calling this
|
||||
// method when another AecDump is attached resets the active AecDump
|
||||
// with a new one. This causes the d-tor of the earlier AecDump to
|
||||
// be called. The d-tor call may block until all pending logging
|
||||
// tasks are completed.
|
||||
virtual void AttachAecDump(std::unique_ptr<AecDump> aec_dump) = 0;
|
||||
|
||||
// If no AecDump is attached, this has no effect. If an AecDump is
|
||||
// attached, it's destructor is called. The d-tor may block until
|
||||
// all pending logging tasks are completed.
|
||||
virtual void DetachAecDump() = 0;
|
||||
|
||||
// Get audio processing statistics.
|
||||
virtual AudioProcessingStats GetStatistics() = 0;
|
||||
// TODO(webrtc:5298) Deprecated variant. The `has_remote_tracks` argument
|
||||
// should be set if there are active remote tracks (this would usually be true
|
||||
// during a call). If there are no remote tracks some of the stats will not be
|
||||
// set by AudioProcessing, because they only make sense if there is at least
|
||||
// one remote track.
|
||||
virtual AudioProcessingStats GetStatistics(bool has_remote_tracks) = 0;
|
||||
|
||||
// Returns the last applied configuration.
|
||||
virtual AudioProcessing::Config GetConfig() const = 0;
|
||||
|
||||
enum Error {
|
||||
// Fatal errors.
|
||||
kNoError = 0,
|
||||
kUnspecifiedError = -1,
|
||||
kCreationFailedError = -2,
|
||||
kUnsupportedComponentError = -3,
|
||||
kUnsupportedFunctionError = -4,
|
||||
kNullPointerError = -5,
|
||||
kBadParameterError = -6,
|
||||
kBadSampleRateError = -7,
|
||||
kBadDataLengthError = -8,
|
||||
kBadNumberChannelsError = -9,
|
||||
kFileError = -10,
|
||||
kStreamParameterNotSetError = -11,
|
||||
kNotEnabledError = -12,
|
||||
|
||||
// Warnings are non-fatal.
|
||||
// This results when a set_stream_ parameter is out of range. Processing
|
||||
// will continue, but the parameter may have been truncated.
|
||||
kBadStreamParameterWarning = -13
|
||||
};
|
||||
|
||||
// Native rates supported by the integer interfaces.
|
||||
enum NativeRate {
|
||||
kSampleRate8kHz = 8000,
|
||||
kSampleRate16kHz = 16000,
|
||||
kSampleRate32kHz = 32000,
|
||||
kSampleRate48kHz = 48000
|
||||
};
|
||||
|
||||
// TODO(kwiberg): We currently need to support a compiler (Visual C++) that
|
||||
// complains if we don't explicitly state the size of the array here. Remove
|
||||
// the size when that's no longer the case.
|
||||
static constexpr int kNativeSampleRatesHz[4] = {
|
||||
kSampleRate8kHz, kSampleRate16kHz, kSampleRate32kHz, kSampleRate48kHz};
|
||||
static constexpr size_t kNumNativeSampleRates =
|
||||
arraysize(kNativeSampleRatesHz);
|
||||
static constexpr int kMaxNativeSampleRateHz =
|
||||
kNativeSampleRatesHz[kNumNativeSampleRates - 1];
|
||||
|
||||
// APM processes audio in chunks of about 10 ms. See GetFrameSize() for
|
||||
// details.
|
||||
static constexpr int kChunkSizeMs = 10;
|
||||
|
||||
// Returns floor(sample_rate_hz/100): the number of samples per channel used
|
||||
// as input and output to the audio processing module in calls to
|
||||
// ProcessStream, ProcessReverseStream, AnalyzeReverseStream, and
|
||||
// GetLinearAecOutput.
|
||||
//
|
||||
// This is exactly 10 ms for sample rates divisible by 100. For example:
|
||||
// - 48000 Hz (480 samples per channel),
|
||||
// - 44100 Hz (441 samples per channel),
|
||||
// - 16000 Hz (160 samples per channel).
|
||||
//
|
||||
// Sample rates not divisible by 100 are received/produced in frames of
|
||||
// approximately 10 ms. For example:
|
||||
// - 22050 Hz (220 samples per channel, or ~9.98 ms per frame),
|
||||
// - 11025 Hz (110 samples per channel, or ~9.98 ms per frame).
|
||||
// These nondivisible sample rates yield lower audio quality compared to
|
||||
// multiples of 100. Internal resampling to 10 ms frames causes a simulated
|
||||
// clock drift effect which impacts the performance of (for example) echo
|
||||
// cancellation.
|
||||
static int GetFrameSize(int sample_rate_hz) { return sample_rate_hz / 100; }
|
||||
};
|
||||
|
||||
// Experimental interface for a custom analysis submodule.
|
||||
class CustomAudioAnalyzer {
|
||||
public:
|
||||
// (Re-) Initializes the submodule.
|
||||
virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
|
||||
// Analyzes the given capture or render signal.
|
||||
virtual void Analyze(const AudioBuffer* audio) = 0;
|
||||
// Returns a string representation of the module state.
|
||||
virtual std::string ToString() const = 0;
|
||||
|
||||
virtual ~CustomAudioAnalyzer() {}
|
||||
};
|
||||
|
||||
// Interface for a custom processing submodule.
|
||||
class CustomProcessing {
|
||||
public:
|
||||
// (Re-)Initializes the submodule.
|
||||
virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
|
||||
// Processes the given capture or render signal.
|
||||
virtual void Process(AudioBuffer* audio) = 0;
|
||||
// Returns a string representation of the module state.
|
||||
virtual std::string ToString() const = 0;
|
||||
// Handles RuntimeSettings. TODO(webrtc:9262): make pure virtual
|
||||
// after updating dependencies.
|
||||
virtual void SetRuntimeSetting(AudioProcessing::RuntimeSetting setting);
|
||||
|
||||
virtual ~CustomProcessing() {}
|
||||
};
|
||||
|
||||
class RTC_EXPORT AudioProcessingBuilder {
|
||||
public:
|
||||
AudioProcessingBuilder();
|
||||
AudioProcessingBuilder(const AudioProcessingBuilder&) = delete;
|
||||
AudioProcessingBuilder& operator=(const AudioProcessingBuilder&) = delete;
|
||||
~AudioProcessingBuilder();
|
||||
|
||||
// Sets the APM configuration.
|
||||
AudioProcessingBuilder& SetConfig(const AudioProcessing::Config& config) {
|
||||
config_ = config;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Sets the echo controller factory to inject when APM is created.
|
||||
AudioProcessingBuilder& SetEchoControlFactory(
|
||||
std::unique_ptr<EchoControlFactory> echo_control_factory) {
|
||||
echo_control_factory_ = std::move(echo_control_factory);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Sets the capture post-processing sub-module to inject when APM is created.
|
||||
AudioProcessingBuilder& SetCapturePostProcessing(
|
||||
std::unique_ptr<CustomProcessing> capture_post_processing) {
|
||||
capture_post_processing_ = std::move(capture_post_processing);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Sets the render pre-processing sub-module to inject when APM is created.
|
||||
AudioProcessingBuilder& SetRenderPreProcessing(
|
||||
std::unique_ptr<CustomProcessing> render_pre_processing) {
|
||||
render_pre_processing_ = std::move(render_pre_processing);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Sets the echo detector to inject when APM is created.
|
||||
AudioProcessingBuilder& SetEchoDetector(
|
||||
rtc::scoped_refptr<EchoDetector> echo_detector) {
|
||||
echo_detector_ = std::move(echo_detector);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Sets the capture analyzer sub-module to inject when APM is created.
|
||||
AudioProcessingBuilder& SetCaptureAnalyzer(
|
||||
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer) {
|
||||
capture_analyzer_ = std::move(capture_analyzer);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Creates an APM instance with the specified config or the default one if
|
||||
// unspecified. Injects the specified components transferring the ownership
|
||||
// to the newly created APM instance - i.e., except for the config, the
|
||||
// builder is reset to its initial state.
|
||||
rtc::scoped_refptr<AudioProcessing> Create();
|
||||
|
||||
private:
|
||||
AudioProcessing::Config config_;
|
||||
std::unique_ptr<EchoControlFactory> echo_control_factory_;
|
||||
std::unique_ptr<CustomProcessing> capture_post_processing_;
|
||||
std::unique_ptr<CustomProcessing> render_pre_processing_;
|
||||
rtc::scoped_refptr<EchoDetector> echo_detector_;
|
||||
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer_;
|
||||
};
|
||||
|
||||
class StreamConfig {
|
||||
public:
|
||||
// sample_rate_hz: The sampling rate of the stream.
|
||||
// num_channels: The number of audio channels in the stream.
|
||||
StreamConfig(int sample_rate_hz = 0,
|
||||
size_t num_channels = 0) // NOLINT(runtime/explicit)
|
||||
: sample_rate_hz_(sample_rate_hz),
|
||||
num_channels_(num_channels),
|
||||
num_frames_(calculate_frames(sample_rate_hz)) {}
|
||||
|
||||
void set_sample_rate_hz(int value) {
|
||||
sample_rate_hz_ = value;
|
||||
num_frames_ = calculate_frames(value);
|
||||
}
|
||||
void set_num_channels(size_t value) { num_channels_ = value; }
|
||||
|
||||
int sample_rate_hz() const { return sample_rate_hz_; }
|
||||
|
||||
// The number of channels in the stream.
|
||||
size_t num_channels() const { return num_channels_; }
|
||||
|
||||
size_t num_frames() const { return num_frames_; }
|
||||
size_t num_samples() const { return num_channels_ * num_frames_; }
|
||||
|
||||
bool operator==(const StreamConfig& other) const {
|
||||
return sample_rate_hz_ == other.sample_rate_hz_ &&
|
||||
num_channels_ == other.num_channels_;
|
||||
}
|
||||
|
||||
bool operator!=(const StreamConfig& other) const { return !(*this == other); }
|
||||
|
||||
private:
|
||||
static size_t calculate_frames(int sample_rate_hz) {
|
||||
return static_cast<size_t>(AudioProcessing::GetFrameSize(sample_rate_hz));
|
||||
}
|
||||
|
||||
int sample_rate_hz_;
|
||||
size_t num_channels_;
|
||||
size_t num_frames_;
|
||||
};
|
||||
|
||||
class ProcessingConfig {
|
||||
public:
|
||||
enum StreamName {
|
||||
kInputStream,
|
||||
kOutputStream,
|
||||
kReverseInputStream,
|
||||
kReverseOutputStream,
|
||||
kNumStreamNames,
|
||||
};
|
||||
|
||||
const StreamConfig& input_stream() const {
|
||||
return streams[StreamName::kInputStream];
|
||||
}
|
||||
const StreamConfig& output_stream() const {
|
||||
return streams[StreamName::kOutputStream];
|
||||
}
|
||||
const StreamConfig& reverse_input_stream() const {
|
||||
return streams[StreamName::kReverseInputStream];
|
||||
}
|
||||
const StreamConfig& reverse_output_stream() const {
|
||||
return streams[StreamName::kReverseOutputStream];
|
||||
}
|
||||
|
||||
StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
|
||||
StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
|
||||
StreamConfig& reverse_input_stream() {
|
||||
return streams[StreamName::kReverseInputStream];
|
||||
}
|
||||
StreamConfig& reverse_output_stream() {
|
||||
return streams[StreamName::kReverseOutputStream];
|
||||
}
|
||||
|
||||
bool operator==(const ProcessingConfig& other) const {
|
||||
for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
|
||||
if (this->streams[i] != other.streams[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool operator!=(const ProcessingConfig& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
StreamConfig streams[StreamName::kNumStreamNames];
|
||||
};
|
||||
|
||||
// Interface for an echo detector submodule.
|
||||
class EchoDetector : public RefCountInterface {
|
||||
public:
|
||||
// (Re-)Initializes the submodule.
|
||||
virtual void Initialize(int capture_sample_rate_hz,
|
||||
int num_capture_channels,
|
||||
int render_sample_rate_hz,
|
||||
int num_render_channels) = 0;
|
||||
|
||||
// Analysis (not changing) of the first channel of the render signal.
|
||||
virtual void AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio) = 0;
|
||||
|
||||
// Analysis (not changing) of the capture signal.
|
||||
virtual void AnalyzeCaptureAudio(
|
||||
rtc::ArrayView<const float> capture_audio) = 0;
|
||||
|
||||
struct Metrics {
|
||||
absl::optional<double> echo_likelihood;
|
||||
absl::optional<double> echo_likelihood_recent_max;
|
||||
};
|
||||
|
||||
// Collect current metrics from the echo detector.
|
||||
virtual Metrics GetMetrics() const = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_AUDIO_AUDIO_PROCESSING_H_
|
22
VocieProcess/api/audio/audio_processing_statistics.cc
Normal file
22
VocieProcess/api/audio/audio_processing_statistics.cc
Normal file
@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/audio/audio_processing_statistics.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AudioProcessingStats::AudioProcessingStats() = default;
|
||||
|
||||
AudioProcessingStats::AudioProcessingStats(const AudioProcessingStats& other) =
|
||||
default;
|
||||
|
||||
AudioProcessingStats::~AudioProcessingStats() = default;
|
||||
|
||||
} // namespace webrtc
|
67
VocieProcess/api/audio/audio_processing_statistics.h
Normal file
67
VocieProcess/api/audio/audio_processing_statistics.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_
|
||||
#define API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "rtc_base/system/rtc_export.h"
|
||||
|
||||
namespace webrtc {
|
||||
// This version of the stats uses Optionals, it will replace the regular
|
||||
// AudioProcessingStatistics struct.
|
||||
struct RTC_EXPORT AudioProcessingStats {
|
||||
AudioProcessingStats();
|
||||
AudioProcessingStats(const AudioProcessingStats& other);
|
||||
~AudioProcessingStats();
|
||||
|
||||
// Deprecated.
|
||||
// TODO(bugs.webrtc.org/11226): Remove.
|
||||
// True if voice is detected in the last capture frame, after processing.
|
||||
// It is conservative in flagging audio as speech, with low likelihood of
|
||||
// incorrectly flagging a frame as voice.
|
||||
// Only reported if voice detection is enabled in AudioProcessing::Config.
|
||||
absl::optional<bool> voice_detected;
|
||||
|
||||
// AEC Statistics.
|
||||
// ERL = 10log_10(P_far / P_echo)
|
||||
absl::optional<double> echo_return_loss;
|
||||
// ERLE = 10log_10(P_echo / P_out)
|
||||
absl::optional<double> echo_return_loss_enhancement;
|
||||
// Fraction of time that the AEC linear filter is divergent, in a 1-second
|
||||
// non-overlapped aggregation window.
|
||||
absl::optional<double> divergent_filter_fraction;
|
||||
|
||||
// The delay metrics consists of the delay median and standard deviation. It
|
||||
// also consists of the fraction of delay estimates that can make the echo
|
||||
// cancellation perform poorly. The values are aggregated until the first
|
||||
// call to `GetStatistics()` and afterwards aggregated and updated every
|
||||
// second. Note that if there are several clients pulling metrics from
|
||||
// `GetStatistics()` during a session the first call from any of them will
|
||||
// change to one second aggregation window for all.
|
||||
absl::optional<int32_t> delay_median_ms;
|
||||
absl::optional<int32_t> delay_standard_deviation_ms;
|
||||
|
||||
// Residual echo detector likelihood.
|
||||
absl::optional<double> residual_echo_likelihood;
|
||||
// Maximum residual echo likelihood from the last time period.
|
||||
absl::optional<double> residual_echo_likelihood_recent_max;
|
||||
|
||||
// The instantaneous delay estimate produced in the AEC. The unit is in
|
||||
// milliseconds and the value is the instantaneous value at the time of the
|
||||
// call to `GetStatistics()`.
|
||||
absl::optional<int32_t> delay_ms;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_
|
269
VocieProcess/api/audio/audio_view.h
Normal file
269
VocieProcess/api/audio/audio_view.h
Normal file
@ -0,0 +1,269 @@
|
||||
/*
|
||||
* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_AUDIO_AUDIO_VIEW_H_
|
||||
#define API_AUDIO_AUDIO_VIEW_H_
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/channel_layout.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This file contains 3 types of view classes:
|
||||
//
|
||||
// * MonoView<>: A single channel contiguous buffer of samples.
|
||||
//
|
||||
// * InterleavedView<>: Channel samples are interleaved (side-by-side) in
|
||||
// the buffer. A single channel InterleavedView<> is the same thing as a
|
||||
// MonoView<>
|
||||
//
|
||||
// * DeinterleavedView<>: Each channel's samples are contiguous within the
|
||||
// buffer. Channels can be enumerated and accessing the individual channel
|
||||
// data is done via MonoView<>.
|
||||
//
|
||||
// The views are comparable to and built on rtc::ArrayView<> but add
|
||||
// audio specific properties for the dimensions of the buffer and the above
|
||||
// specialized [de]interleaved support.
|
||||
//
|
||||
// There are also a few generic utility functions that can simplify
|
||||
// generic code for supporting more than one type of view.
|
||||
|
||||
// MonoView<> represents a view over a single contiguous, audio buffer. This
|
||||
// can be either an single channel (mono) interleaved buffer (e.g. AudioFrame),
|
||||
// or a de-interleaved channel (e.g. from AudioBuffer).
|
||||
template <typename T>
|
||||
using MonoView = rtc::ArrayView<T>;
|
||||
|
||||
// InterleavedView<> is a view over an interleaved audio buffer (e.g. from
|
||||
// AudioFrame).
|
||||
template <typename T>
|
||||
class InterleavedView {
|
||||
public:
|
||||
using value_type = T;
|
||||
|
||||
InterleavedView() = default;
|
||||
|
||||
template <typename U>
|
||||
InterleavedView(U* data, size_t samples_per_channel, size_t num_channels)
|
||||
: num_channels_(num_channels),
|
||||
samples_per_channel_(samples_per_channel),
|
||||
data_(data, num_channels * samples_per_channel) {
|
||||
RTC_DCHECK_LE(num_channels_, kMaxConcurrentChannels);
|
||||
RTC_DCHECK(num_channels_ == 0u || samples_per_channel_ != 0u);
|
||||
}
|
||||
|
||||
// Construct an InterleavedView from a C-style array. Samples per channels
|
||||
// is calculated based on the array size / num_channels.
|
||||
template <typename U, size_t N>
|
||||
InterleavedView(U (&array)[N], // NOLINT
|
||||
size_t num_channels)
|
||||
: InterleavedView(array, N / num_channels, num_channels) {
|
||||
RTC_DCHECK_EQ(N % num_channels, 0u);
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
InterleavedView(const InterleavedView<U>& other)
|
||||
: num_channels_(other.num_channels()),
|
||||
samples_per_channel_(other.samples_per_channel()),
|
||||
data_(other.data()) {}
|
||||
|
||||
size_t num_channels() const { return num_channels_; }
|
||||
size_t samples_per_channel() const { return samples_per_channel_; }
|
||||
rtc::ArrayView<T> data() const { return data_; }
|
||||
bool empty() const { return data_.empty(); }
|
||||
size_t size() const { return data_.size(); }
|
||||
|
||||
MonoView<T> AsMono() const {
|
||||
RTC_DCHECK_EQ(num_channels(), 1u);
|
||||
RTC_DCHECK_EQ(data_.size(), samples_per_channel_);
|
||||
return data_;
|
||||
}
|
||||
|
||||
// A simple wrapper around memcpy that includes checks for properties.
|
||||
// TODO(tommi): Consider if this can be utility function for both interleaved
|
||||
// and deinterleaved views.
|
||||
template <typename U>
|
||||
void CopyFrom(const InterleavedView<U>& source) {
|
||||
static_assert(sizeof(T) == sizeof(U), "");
|
||||
RTC_DCHECK_EQ(num_channels(), source.num_channels());
|
||||
RTC_DCHECK_EQ(samples_per_channel(), source.samples_per_channel());
|
||||
RTC_DCHECK_GE(data_.size(), source.data().size());
|
||||
const auto data = source.data();
|
||||
memcpy(&data_[0], &data[0], data.size() * sizeof(U));
|
||||
}
|
||||
|
||||
T& operator[](size_t idx) const { return data_[idx]; }
|
||||
T* begin() const { return data_.begin(); }
|
||||
T* end() const { return data_.end(); }
|
||||
const T* cbegin() const { return data_.cbegin(); }
|
||||
const T* cend() const { return data_.cend(); }
|
||||
std::reverse_iterator<T*> rbegin() const { return data_.rbegin(); }
|
||||
std::reverse_iterator<T*> rend() const { return data_.rend(); }
|
||||
std::reverse_iterator<const T*> crbegin() const { return data_.crbegin(); }
|
||||
std::reverse_iterator<const T*> crend() const { return data_.crend(); }
|
||||
|
||||
private:
|
||||
// TODO(tommi): Consider having these both be stored as uint16_t to
|
||||
// save a few bytes per view. Use `dchecked_cast` to support size_t during
|
||||
// construction.
|
||||
size_t num_channels_ = 0u;
|
||||
size_t samples_per_channel_ = 0u;
|
||||
rtc::ArrayView<T> data_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class DeinterleavedView {
|
||||
public:
|
||||
using value_type = T;
|
||||
|
||||
DeinterleavedView() = default;
|
||||
|
||||
template <typename U>
|
||||
DeinterleavedView(U* data, size_t samples_per_channel, size_t num_channels)
|
||||
: num_channels_(num_channels),
|
||||
samples_per_channel_(samples_per_channel),
|
||||
data_(data, num_channels * samples_per_channel_) {}
|
||||
|
||||
template <typename U>
|
||||
DeinterleavedView(const DeinterleavedView<U>& other)
|
||||
: num_channels_(other.num_channels()),
|
||||
samples_per_channel_(other.samples_per_channel()),
|
||||
data_(other.data()) {}
|
||||
|
||||
// Returns a deinterleaved channel where `idx` is the zero based index,
|
||||
// in the range [0 .. num_channels()-1].
|
||||
MonoView<T> operator[](size_t idx) const {
|
||||
RTC_DCHECK_LT(idx, num_channels_);
|
||||
return MonoView<T>(&data_[idx * samples_per_channel_],
|
||||
samples_per_channel_);
|
||||
}
|
||||
|
||||
size_t num_channels() const { return num_channels_; }
|
||||
size_t samples_per_channel() const { return samples_per_channel_; }
|
||||
rtc::ArrayView<T> data() const { return data_; }
|
||||
bool empty() const { return data_.empty(); }
|
||||
size_t size() const { return data_.size(); }
|
||||
|
||||
// Returns the first (and possibly only) channel.
|
||||
MonoView<T> AsMono() const {
|
||||
RTC_DCHECK_GE(num_channels(), 1u);
|
||||
return (*this)[0];
|
||||
}
|
||||
|
||||
private:
|
||||
// TODO(tommi): Consider having these be stored as uint16_t to save a few
|
||||
// bytes per view. Use `dchecked_cast` to support size_t during construction.
|
||||
size_t num_channels_ = 0u;
|
||||
size_t samples_per_channel_ = 0u;
|
||||
rtc::ArrayView<T> data_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
constexpr size_t NumChannels(const MonoView<T>& view) {
|
||||
return 1u;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t NumChannels(const InterleavedView<T>& view) {
|
||||
return view.num_channels();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t NumChannels(const DeinterleavedView<T>& view) {
|
||||
return view.num_channels();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr bool IsMono(const MonoView<T>& view) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr bool IsInterleavedView(const MonoView<T>& view) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr bool IsInterleavedView(const InterleavedView<T>& view) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr bool IsInterleavedView(const DeinterleavedView<const T>& view) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool IsMono(const InterleavedView<T>& view) {
|
||||
return NumChannels(view) == 1u;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool IsMono(const DeinterleavedView<T>& view) {
|
||||
return NumChannels(view) == 1u;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t SamplesPerChannel(const MonoView<T>& view) {
|
||||
return view.size();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t SamplesPerChannel(const InterleavedView<T>& view) {
|
||||
return view.samples_per_channel();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t SamplesPerChannel(const DeinterleavedView<T>& view) {
|
||||
return view.samples_per_channel();
|
||||
}
|
||||
// A simple wrapper around memcpy that includes checks for properties.
|
||||
// The parameter order is the same as for memcpy(), first destination then
|
||||
// source.
|
||||
template <typename D, typename S>
|
||||
void CopySamples(D& destination, const S& source) {
|
||||
static_assert(
|
||||
sizeof(typename D::value_type) == sizeof(typename S::value_type), "");
|
||||
// Here we'd really like to do
|
||||
// static_assert(IsInterleavedView(destination) == IsInterleavedView(source),
|
||||
// "");
|
||||
// but the compiler doesn't like it inside this template function for
|
||||
// some reason. The following check is an approximation but unfortunately
|
||||
// means that copying between a MonoView and single channel interleaved or
|
||||
// deinterleaved views wouldn't work.
|
||||
// static_assert(sizeof(destination) == sizeof(source),
|
||||
// "Incompatible view types");
|
||||
RTC_DCHECK_EQ(NumChannels(destination), NumChannels(source));
|
||||
RTC_DCHECK_EQ(SamplesPerChannel(destination), SamplesPerChannel(source));
|
||||
RTC_DCHECK_GE(destination.size(), source.size());
|
||||
memcpy(&destination[0], &source[0],
|
||||
source.size() * sizeof(typename S::value_type));
|
||||
}
|
||||
|
||||
// Sets all the samples in a view to 0. This template function is a simple
|
||||
// wrapper around `memset()` but adds the benefit of automatically calculating
|
||||
// the byte size from the number of samples and sample type.
|
||||
template <typename T>
|
||||
void ClearSamples(T& view) {
|
||||
memset(&view[0], 0, view.size() * sizeof(typename T::value_type));
|
||||
}
|
||||
|
||||
// Same as `ClearSamples()` above but allows for clearing only the first
|
||||
// `sample_count` number of samples.
|
||||
template <typename T>
|
||||
void ClearSamples(T& view, size_t sample_count) {
|
||||
RTC_DCHECK_LE(sample_count, view.size());
|
||||
memset(&view[0], 0, sample_count * sizeof(typename T::value_type));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_AUDIO_AUDIO_VIEW_H_
|
282
VocieProcess/api/audio/channel_layout.cc
Normal file
282
VocieProcess/api/audio/channel_layout.cc
Normal file
@ -0,0 +1,282 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/audio/channel_layout.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "rtc_base/arraysize.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kLayoutToChannels[] = {
|
||||
0, // CHANNEL_LAYOUT_NONE
|
||||
0, // CHANNEL_LAYOUT_UNSUPPORTED
|
||||
1, // CHANNEL_LAYOUT_MONO
|
||||
2, // CHANNEL_LAYOUT_STEREO
|
||||
3, // CHANNEL_LAYOUT_2_1
|
||||
3, // CHANNEL_LAYOUT_SURROUND
|
||||
4, // CHANNEL_LAYOUT_4_0
|
||||
4, // CHANNEL_LAYOUT_2_2
|
||||
4, // CHANNEL_LAYOUT_QUAD
|
||||
5, // CHANNEL_LAYOUT_5_0
|
||||
6, // CHANNEL_LAYOUT_5_1
|
||||
5, // CHANNEL_LAYOUT_5_0_BACK
|
||||
6, // CHANNEL_LAYOUT_5_1_BACK
|
||||
7, // CHANNEL_LAYOUT_7_0
|
||||
8, // CHANNEL_LAYOUT_7_1
|
||||
8, // CHANNEL_LAYOUT_7_1_WIDE
|
||||
2, // CHANNEL_LAYOUT_STEREO_DOWNMIX
|
||||
3, // CHANNEL_LAYOUT_2POINT1
|
||||
4, // CHANNEL_LAYOUT_3_1
|
||||
5, // CHANNEL_LAYOUT_4_1
|
||||
6, // CHANNEL_LAYOUT_6_0
|
||||
6, // CHANNEL_LAYOUT_6_0_FRONT
|
||||
6, // CHANNEL_LAYOUT_HEXAGONAL
|
||||
7, // CHANNEL_LAYOUT_6_1
|
||||
7, // CHANNEL_LAYOUT_6_1_BACK
|
||||
7, // CHANNEL_LAYOUT_6_1_FRONT
|
||||
7, // CHANNEL_LAYOUT_7_0_FRONT
|
||||
8, // CHANNEL_LAYOUT_7_1_WIDE_BACK
|
||||
8, // CHANNEL_LAYOUT_OCTAGONAL
|
||||
0, // CHANNEL_LAYOUT_DISCRETE
|
||||
3, // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
|
||||
5, // CHANNEL_LAYOUT_4_1_QUAD_SIDE
|
||||
0, // CHANNEL_LAYOUT_BITSTREAM
|
||||
};
|
||||
|
||||
// The channel orderings for each layout as specified by FFmpeg. Each value
|
||||
// represents the index of each channel in each layout. Values of -1 mean the
|
||||
// channel at that index is not used for that layout. For example, the left side
|
||||
// surround sound channel in FFmpeg's 5.1 layout is in the 5th position (because
|
||||
// the order is L, R, C, LFE, LS, RS), so
|
||||
// kChannelOrderings[CHANNEL_LAYOUT_5_1][SIDE_LEFT] = 4;
|
||||
static const int kChannelOrderings[CHANNEL_LAYOUT_MAX + 1][CHANNELS_MAX + 1] = {
|
||||
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
|
||||
|
||||
// CHANNEL_LAYOUT_NONE
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_UNSUPPORTED
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_MONO
|
||||
{-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_STEREO
|
||||
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_2_1
|
||||
{0, 1, -1, -1, -1, -1, -1, -1, 2, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_SURROUND
|
||||
{0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_4_0
|
||||
{0, 1, 2, -1, -1, -1, -1, -1, 3, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_2_2
|
||||
{0, 1, -1, -1, -1, -1, -1, -1, -1, 2, 3},
|
||||
|
||||
// CHANNEL_LAYOUT_QUAD
|
||||
{0, 1, -1, -1, 2, 3, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_5_0
|
||||
{0, 1, 2, -1, -1, -1, -1, -1, -1, 3, 4},
|
||||
|
||||
// CHANNEL_LAYOUT_5_1
|
||||
{0, 1, 2, 3, -1, -1, -1, -1, -1, 4, 5},
|
||||
|
||||
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
|
||||
|
||||
// CHANNEL_LAYOUT_5_0_BACK
|
||||
{0, 1, 2, -1, 3, 4, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_5_1_BACK
|
||||
{0, 1, 2, 3, 4, 5, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_7_0
|
||||
{0, 1, 2, -1, 5, 6, -1, -1, -1, 3, 4},
|
||||
|
||||
// CHANNEL_LAYOUT_7_1
|
||||
{0, 1, 2, 3, 6, 7, -1, -1, -1, 4, 5},
|
||||
|
||||
// CHANNEL_LAYOUT_7_1_WIDE
|
||||
{0, 1, 2, 3, -1, -1, 6, 7, -1, 4, 5},
|
||||
|
||||
// CHANNEL_LAYOUT_STEREO_DOWNMIX
|
||||
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_2POINT1
|
||||
{0, 1, -1, 2, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_3_1
|
||||
{0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_4_1
|
||||
{0, 1, 2, 4, -1, -1, -1, -1, 3, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_6_0
|
||||
{0, 1, 2, -1, -1, -1, -1, -1, 5, 3, 4},
|
||||
|
||||
// CHANNEL_LAYOUT_6_0_FRONT
|
||||
{0, 1, -1, -1, -1, -1, 4, 5, -1, 2, 3},
|
||||
|
||||
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
|
||||
|
||||
// CHANNEL_LAYOUT_HEXAGONAL
|
||||
{0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_6_1
|
||||
{0, 1, 2, 3, -1, -1, -1, -1, 6, 4, 5},
|
||||
|
||||
// CHANNEL_LAYOUT_6_1_BACK
|
||||
{0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_6_1_FRONT
|
||||
{0, 1, -1, 6, -1, -1, 4, 5, -1, 2, 3},
|
||||
|
||||
// CHANNEL_LAYOUT_7_0_FRONT
|
||||
{0, 1, 2, -1, -1, -1, 5, 6, -1, 3, 4},
|
||||
|
||||
// CHANNEL_LAYOUT_7_1_WIDE_BACK
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_OCTAGONAL
|
||||
{0, 1, 2, -1, 5, 6, -1, -1, 7, 3, 4},
|
||||
|
||||
// CHANNEL_LAYOUT_DISCRETE
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
|
||||
{0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_4_1_QUAD_SIDE
|
||||
{0, 1, -1, 4, -1, -1, -1, -1, -1, 2, 3},
|
||||
|
||||
// CHANNEL_LAYOUT_BITSTREAM
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
|
||||
};
|
||||
|
||||
int ChannelLayoutToChannelCount(ChannelLayout layout) {
|
||||
RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kLayoutToChannels));
|
||||
RTC_DCHECK_LE(kLayoutToChannels[layout], kMaxConcurrentChannels);
|
||||
return kLayoutToChannels[layout];
|
||||
}
|
||||
|
||||
// Converts a channel count into a channel layout.
|
||||
ChannelLayout GuessChannelLayout(int channels) {
|
||||
switch (channels) {
|
||||
case 1:
|
||||
return CHANNEL_LAYOUT_MONO;
|
||||
case 2:
|
||||
return CHANNEL_LAYOUT_STEREO;
|
||||
case 3:
|
||||
return CHANNEL_LAYOUT_SURROUND;
|
||||
case 4:
|
||||
return CHANNEL_LAYOUT_QUAD;
|
||||
case 5:
|
||||
return CHANNEL_LAYOUT_5_0;
|
||||
case 6:
|
||||
return CHANNEL_LAYOUT_5_1;
|
||||
case 7:
|
||||
return CHANNEL_LAYOUT_6_1;
|
||||
case 8:
|
||||
return CHANNEL_LAYOUT_7_1;
|
||||
default:
|
||||
RTC_DLOG(LS_WARNING) << "Unsupported channel count: " << channels;
|
||||
}
|
||||
return CHANNEL_LAYOUT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
int ChannelOrder(ChannelLayout layout, Channels channel) {
|
||||
RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kChannelOrderings));
|
||||
RTC_DCHECK_LT(static_cast<size_t>(channel), arraysize(kChannelOrderings[0]));
|
||||
return kChannelOrderings[layout][channel];
|
||||
}
|
||||
|
||||
const char* ChannelLayoutToString(ChannelLayout layout) {
|
||||
switch (layout) {
|
||||
case CHANNEL_LAYOUT_NONE:
|
||||
return "NONE";
|
||||
case CHANNEL_LAYOUT_UNSUPPORTED:
|
||||
return "UNSUPPORTED";
|
||||
case CHANNEL_LAYOUT_MONO:
|
||||
return "MONO";
|
||||
case CHANNEL_LAYOUT_STEREO:
|
||||
return "STEREO";
|
||||
case CHANNEL_LAYOUT_2_1:
|
||||
return "2.1";
|
||||
case CHANNEL_LAYOUT_SURROUND:
|
||||
return "SURROUND";
|
||||
case CHANNEL_LAYOUT_4_0:
|
||||
return "4.0";
|
||||
case CHANNEL_LAYOUT_2_2:
|
||||
return "QUAD_SIDE";
|
||||
case CHANNEL_LAYOUT_QUAD:
|
||||
return "QUAD";
|
||||
case CHANNEL_LAYOUT_5_0:
|
||||
return "5.0";
|
||||
case CHANNEL_LAYOUT_5_1:
|
||||
return "5.1";
|
||||
case CHANNEL_LAYOUT_5_0_BACK:
|
||||
return "5.0_BACK";
|
||||
case CHANNEL_LAYOUT_5_1_BACK:
|
||||
return "5.1_BACK";
|
||||
case CHANNEL_LAYOUT_7_0:
|
||||
return "7.0";
|
||||
case CHANNEL_LAYOUT_7_1:
|
||||
return "7.1";
|
||||
case CHANNEL_LAYOUT_7_1_WIDE:
|
||||
return "7.1_WIDE";
|
||||
case CHANNEL_LAYOUT_STEREO_DOWNMIX:
|
||||
return "STEREO_DOWNMIX";
|
||||
case CHANNEL_LAYOUT_2POINT1:
|
||||
return "2POINT1";
|
||||
case CHANNEL_LAYOUT_3_1:
|
||||
return "3.1";
|
||||
case CHANNEL_LAYOUT_4_1:
|
||||
return "4.1";
|
||||
case CHANNEL_LAYOUT_6_0:
|
||||
return "6.0";
|
||||
case CHANNEL_LAYOUT_6_0_FRONT:
|
||||
return "6.0_FRONT";
|
||||
case CHANNEL_LAYOUT_HEXAGONAL:
|
||||
return "HEXAGONAL";
|
||||
case CHANNEL_LAYOUT_6_1:
|
||||
return "6.1";
|
||||
case CHANNEL_LAYOUT_6_1_BACK:
|
||||
return "6.1_BACK";
|
||||
case CHANNEL_LAYOUT_6_1_FRONT:
|
||||
return "6.1_FRONT";
|
||||
case CHANNEL_LAYOUT_7_0_FRONT:
|
||||
return "7.0_FRONT";
|
||||
case CHANNEL_LAYOUT_7_1_WIDE_BACK:
|
||||
return "7.1_WIDE_BACK";
|
||||
case CHANNEL_LAYOUT_OCTAGONAL:
|
||||
return "OCTAGONAL";
|
||||
case CHANNEL_LAYOUT_DISCRETE:
|
||||
return "DISCRETE";
|
||||
case CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC:
|
||||
return "STEREO_AND_KEYBOARD_MIC";
|
||||
case CHANNEL_LAYOUT_4_1_QUAD_SIDE:
|
||||
return "4.1_QUAD_SIDE";
|
||||
case CHANNEL_LAYOUT_BITSTREAM:
|
||||
return "BITSTREAM";
|
||||
}
|
||||
RTC_DCHECK_NOTREACHED() << "Invalid channel layout provided: " << layout;
|
||||
return "";
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
165
VocieProcess/api/audio/channel_layout.h
Normal file
165
VocieProcess/api/audio/channel_layout.h
Normal file
@ -0,0 +1,165 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_AUDIO_CHANNEL_LAYOUT_H_
|
||||
#define API_AUDIO_CHANNEL_LAYOUT_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This file is derived from Chromium's base/channel_layout.h.
|
||||
|
||||
// Enumerates the various representations of the ordering of audio channels.
|
||||
// Logged to UMA, so never reuse a value, always add new/greater ones!
|
||||
enum ChannelLayout {
|
||||
CHANNEL_LAYOUT_NONE = 0,
|
||||
CHANNEL_LAYOUT_UNSUPPORTED = 1,
|
||||
|
||||
// Front C
|
||||
CHANNEL_LAYOUT_MONO = 2,
|
||||
|
||||
// Front L, Front R
|
||||
CHANNEL_LAYOUT_STEREO = 3,
|
||||
|
||||
// Front L, Front R, Back C
|
||||
CHANNEL_LAYOUT_2_1 = 4,
|
||||
|
||||
// Front L, Front R, Front C
|
||||
CHANNEL_LAYOUT_SURROUND = 5,
|
||||
|
||||
// Front L, Front R, Front C, Back C
|
||||
CHANNEL_LAYOUT_4_0 = 6,
|
||||
|
||||
// Front L, Front R, Side L, Side R
|
||||
CHANNEL_LAYOUT_2_2 = 7,
|
||||
|
||||
// Front L, Front R, Back L, Back R
|
||||
CHANNEL_LAYOUT_QUAD = 8,
|
||||
|
||||
// Front L, Front R, Front C, Side L, Side R
|
||||
CHANNEL_LAYOUT_5_0 = 9,
|
||||
|
||||
// Front L, Front R, Front C, LFE, Side L, Side R
|
||||
CHANNEL_LAYOUT_5_1 = 10,
|
||||
|
||||
// Front L, Front R, Front C, Back L, Back R
|
||||
CHANNEL_LAYOUT_5_0_BACK = 11,
|
||||
|
||||
// Front L, Front R, Front C, LFE, Back L, Back R
|
||||
CHANNEL_LAYOUT_5_1_BACK = 12,
|
||||
|
||||
// Front L, Front R, Front C, Side L, Side R, Back L, Back R
|
||||
CHANNEL_LAYOUT_7_0 = 13,
|
||||
|
||||
// Front L, Front R, Front C, LFE, Side L, Side R, Back L, Back R
|
||||
CHANNEL_LAYOUT_7_1 = 14,
|
||||
|
||||
// Front L, Front R, Front C, LFE, Side L, Side R, Front LofC, Front RofC
|
||||
CHANNEL_LAYOUT_7_1_WIDE = 15,
|
||||
|
||||
// Stereo L, Stereo R
|
||||
CHANNEL_LAYOUT_STEREO_DOWNMIX = 16,
|
||||
|
||||
// Stereo L, Stereo R, LFE
|
||||
CHANNEL_LAYOUT_2POINT1 = 17,
|
||||
|
||||
// Stereo L, Stereo R, Front C, LFE
|
||||
CHANNEL_LAYOUT_3_1 = 18,
|
||||
|
||||
// Stereo L, Stereo R, Front C, Rear C, LFE
|
||||
CHANNEL_LAYOUT_4_1 = 19,
|
||||
|
||||
// Stereo L, Stereo R, Front C, Side L, Side R, Back C
|
||||
CHANNEL_LAYOUT_6_0 = 20,
|
||||
|
||||
// Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC
|
||||
CHANNEL_LAYOUT_6_0_FRONT = 21,
|
||||
|
||||
// Stereo L, Stereo R, Front C, Rear L, Rear R, Rear C
|
||||
CHANNEL_LAYOUT_HEXAGONAL = 22,
|
||||
|
||||
// Stereo L, Stereo R, Front C, LFE, Side L, Side R, Rear Center
|
||||
CHANNEL_LAYOUT_6_1 = 23,
|
||||
|
||||
// Stereo L, Stereo R, Front C, LFE, Back L, Back R, Rear Center
|
||||
CHANNEL_LAYOUT_6_1_BACK = 24,
|
||||
|
||||
// Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC, LFE
|
||||
CHANNEL_LAYOUT_6_1_FRONT = 25,
|
||||
|
||||
// Front L, Front R, Front C, Side L, Side R, Front LofC, Front RofC
|
||||
CHANNEL_LAYOUT_7_0_FRONT = 26,
|
||||
|
||||
// Front L, Front R, Front C, LFE, Back L, Back R, Front LofC, Front RofC
|
||||
CHANNEL_LAYOUT_7_1_WIDE_BACK = 27,
|
||||
|
||||
// Front L, Front R, Front C, Side L, Side R, Rear L, Back R, Back C.
|
||||
CHANNEL_LAYOUT_OCTAGONAL = 28,
|
||||
|
||||
// Channels are not explicitly mapped to speakers.
|
||||
CHANNEL_LAYOUT_DISCRETE = 29,
|
||||
|
||||
// Front L, Front R, Front C. Front C contains the keyboard mic audio. This
|
||||
// layout is only intended for input for WebRTC. The Front C channel
|
||||
// is stripped away in the WebRTC audio input pipeline and never seen outside
|
||||
// of that.
|
||||
CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC = 30,
|
||||
|
||||
// Front L, Front R, Side L, Side R, LFE
|
||||
CHANNEL_LAYOUT_4_1_QUAD_SIDE = 31,
|
||||
|
||||
// Actual channel layout is specified in the bitstream and the actual channel
|
||||
// count is unknown at Chromium media pipeline level (useful for audio
|
||||
// pass-through mode).
|
||||
CHANNEL_LAYOUT_BITSTREAM = 32,
|
||||
|
||||
// Max value, must always equal the largest entry ever logged.
|
||||
CHANNEL_LAYOUT_MAX = CHANNEL_LAYOUT_BITSTREAM
|
||||
};
|
||||
|
||||
// Note: Do not reorder or reassign these values; other code depends on their
|
||||
// ordering to operate correctly. E.g., CoreAudio channel layout computations.
|
||||
enum Channels {
|
||||
LEFT = 0,
|
||||
RIGHT,
|
||||
CENTER,
|
||||
LFE,
|
||||
BACK_LEFT,
|
||||
BACK_RIGHT,
|
||||
LEFT_OF_CENTER,
|
||||
RIGHT_OF_CENTER,
|
||||
BACK_CENTER,
|
||||
SIDE_LEFT,
|
||||
SIDE_RIGHT,
|
||||
CHANNELS_MAX =
|
||||
SIDE_RIGHT, // Must always equal the largest value ever logged.
|
||||
};
|
||||
|
||||
// The maximum number of concurrently active channels for all possible layouts.
|
||||
// ChannelLayoutToChannelCount() will never return a value higher than this.
|
||||
constexpr int kMaxConcurrentChannels = 8;
|
||||
|
||||
// Returns the expected channel position in an interleaved stream. Values of -1
|
||||
// mean the channel at that index is not used for that layout. Values range
|
||||
// from 0 to ChannelLayoutToChannelCount(layout) - 1.
|
||||
int ChannelOrder(ChannelLayout layout, Channels channel);
|
||||
|
||||
// Returns the number of channels in a given ChannelLayout.
|
||||
int ChannelLayoutToChannelCount(ChannelLayout layout);
|
||||
|
||||
// Given the number of channels, return the best layout,
|
||||
// or return CHANNEL_LAYOUT_UNSUPPORTED if there is no good match.
|
||||
ChannelLayout GuessChannelLayout(int channels);
|
||||
|
||||
// Returns a string representation of the channel layout.
|
||||
const char* ChannelLayoutToString(ChannelLayout layout);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_AUDIO_CHANNEL_LAYOUT_H_
|
278
VocieProcess/api/audio/echo_canceller3_config.cc
Normal file
278
VocieProcess/api/audio/echo_canceller3_config.cc
Normal file
@ -0,0 +1,278 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
bool Limit(float* value, float min, float max) {
|
||||
float clamped = rtc::SafeClamp(*value, min, max);
|
||||
clamped = std::isfinite(clamped) ? clamped : min;
|
||||
bool res = *value == clamped;
|
||||
*value = clamped;
|
||||
return res;
|
||||
}
|
||||
|
||||
bool Limit(size_t* value, size_t min, size_t max) {
|
||||
size_t clamped = rtc::SafeClamp(*value, min, max);
|
||||
bool res = *value == clamped;
|
||||
*value = clamped;
|
||||
return res;
|
||||
}
|
||||
|
||||
bool Limit(int* value, int min, int max) {
|
||||
int clamped = rtc::SafeClamp(*value, min, max);
|
||||
bool res = *value == clamped;
|
||||
*value = clamped;
|
||||
return res;
|
||||
}
|
||||
|
||||
bool FloorLimit(size_t* value, size_t min) {
|
||||
size_t clamped = *value >= min ? *value : min;
|
||||
bool res = *value == clamped;
|
||||
*value = clamped;
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
EchoCanceller3Config::EchoCanceller3Config() = default;
|
||||
EchoCanceller3Config::EchoCanceller3Config(const EchoCanceller3Config& e) =
|
||||
default;
|
||||
EchoCanceller3Config& EchoCanceller3Config::operator=(
|
||||
const EchoCanceller3Config& e) = default;
|
||||
EchoCanceller3Config::Delay::Delay() = default;
|
||||
EchoCanceller3Config::Delay::Delay(const EchoCanceller3Config::Delay& e) =
|
||||
default;
|
||||
EchoCanceller3Config::Delay& EchoCanceller3Config::Delay::operator=(
|
||||
const Delay& e) = default;
|
||||
|
||||
EchoCanceller3Config::EchoModel::EchoModel() = default;
|
||||
EchoCanceller3Config::EchoModel::EchoModel(
|
||||
const EchoCanceller3Config::EchoModel& e) = default;
|
||||
EchoCanceller3Config::EchoModel& EchoCanceller3Config::EchoModel::operator=(
|
||||
const EchoModel& e) = default;
|
||||
|
||||
EchoCanceller3Config::Suppressor::Suppressor() = default;
|
||||
EchoCanceller3Config::Suppressor::Suppressor(
|
||||
const EchoCanceller3Config::Suppressor& e) = default;
|
||||
EchoCanceller3Config::Suppressor& EchoCanceller3Config::Suppressor::operator=(
|
||||
const Suppressor& e) = default;
|
||||
|
||||
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
|
||||
float enr_transparent,
|
||||
float enr_suppress,
|
||||
float emr_transparent)
|
||||
: enr_transparent(enr_transparent),
|
||||
enr_suppress(enr_suppress),
|
||||
emr_transparent(emr_transparent) {}
|
||||
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
|
||||
const EchoCanceller3Config::Suppressor::MaskingThresholds& e) = default;
|
||||
EchoCanceller3Config::Suppressor::MaskingThresholds&
|
||||
EchoCanceller3Config::Suppressor::MaskingThresholds::operator=(
|
||||
const MaskingThresholds& e) = default;
|
||||
|
||||
EchoCanceller3Config::Suppressor::Tuning::Tuning(MaskingThresholds mask_lf,
|
||||
MaskingThresholds mask_hf,
|
||||
float max_inc_factor,
|
||||
float max_dec_factor_lf)
|
||||
: mask_lf(mask_lf),
|
||||
mask_hf(mask_hf),
|
||||
max_inc_factor(max_inc_factor),
|
||||
max_dec_factor_lf(max_dec_factor_lf) {}
|
||||
EchoCanceller3Config::Suppressor::Tuning::Tuning(
|
||||
const EchoCanceller3Config::Suppressor::Tuning& e) = default;
|
||||
EchoCanceller3Config::Suppressor::Tuning&
|
||||
EchoCanceller3Config::Suppressor::Tuning::operator=(const Tuning& e) = default;
|
||||
|
||||
bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
|
||||
RTC_DCHECK(config);
|
||||
EchoCanceller3Config* c = config;
|
||||
bool res = true;
|
||||
|
||||
if (c->delay.down_sampling_factor != 4 &&
|
||||
c->delay.down_sampling_factor != 8) {
|
||||
c->delay.down_sampling_factor = 4;
|
||||
res = false;
|
||||
}
|
||||
|
||||
res = res & Limit(&c->delay.default_delay, 0, 5000);
|
||||
res = res & Limit(&c->delay.num_filters, 0, 5000);
|
||||
res = res & Limit(&c->delay.delay_headroom_samples, 0, 5000);
|
||||
res = res & Limit(&c->delay.hysteresis_limit_blocks, 0, 5000);
|
||||
res = res & Limit(&c->delay.fixed_capture_delay_samples, 0, 5000);
|
||||
res = res & Limit(&c->delay.delay_estimate_smoothing, 0.f, 1.f);
|
||||
res = res & Limit(&c->delay.delay_candidate_detection_threshold, 0.f, 1.f);
|
||||
res = res & Limit(&c->delay.delay_selection_thresholds.initial, 1, 250);
|
||||
res = res & Limit(&c->delay.delay_selection_thresholds.converged, 1, 250);
|
||||
|
||||
res = res & FloorLimit(&c->filter.refined.length_blocks, 1);
|
||||
res = res & Limit(&c->filter.refined.leakage_converged, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined.leakage_diverged, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined.error_floor, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined.error_ceil, 0.f, 100000000.f);
|
||||
res = res & Limit(&c->filter.refined.noise_gate, 0.f, 100000000.f);
|
||||
|
||||
res = res & FloorLimit(&c->filter.refined_initial.length_blocks, 1);
|
||||
res = res & Limit(&c->filter.refined_initial.leakage_converged, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined_initial.leakage_diverged, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined_initial.error_floor, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined_initial.error_ceil, 0.f, 100000000.f);
|
||||
res = res & Limit(&c->filter.refined_initial.noise_gate, 0.f, 100000000.f);
|
||||
|
||||
if (c->filter.refined.length_blocks <
|
||||
c->filter.refined_initial.length_blocks) {
|
||||
c->filter.refined_initial.length_blocks = c->filter.refined.length_blocks;
|
||||
res = false;
|
||||
}
|
||||
|
||||
res = res & FloorLimit(&c->filter.coarse.length_blocks, 1);
|
||||
res = res & Limit(&c->filter.coarse.rate, 0.f, 1.f);
|
||||
res = res & Limit(&c->filter.coarse.noise_gate, 0.f, 100000000.f);
|
||||
|
||||
res = res & FloorLimit(&c->filter.coarse_initial.length_blocks, 1);
|
||||
res = res & Limit(&c->filter.coarse_initial.rate, 0.f, 1.f);
|
||||
res = res & Limit(&c->filter.coarse_initial.noise_gate, 0.f, 100000000.f);
|
||||
|
||||
if (c->filter.coarse.length_blocks < c->filter.coarse_initial.length_blocks) {
|
||||
c->filter.coarse_initial.length_blocks = c->filter.coarse.length_blocks;
|
||||
res = false;
|
||||
}
|
||||
|
||||
res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000);
|
||||
res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f);
|
||||
res = res & Limit(&c->filter.coarse_reset_hangover_blocks, 0, 250000);
|
||||
|
||||
res = res & Limit(&c->erle.min, 1.f, 100000.f);
|
||||
res = res & Limit(&c->erle.max_l, 1.f, 100000.f);
|
||||
res = res & Limit(&c->erle.max_h, 1.f, 100000.f);
|
||||
if (c->erle.min > c->erle.max_l || c->erle.min > c->erle.max_h) {
|
||||
c->erle.min = std::min(c->erle.max_l, c->erle.max_h);
|
||||
res = false;
|
||||
}
|
||||
res = res & Limit(&c->erle.num_sections, 1, c->filter.refined.length_blocks);
|
||||
|
||||
res = res & Limit(&c->ep_strength.default_gain, 0.f, 1000000.f);
|
||||
res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f);
|
||||
res = res & Limit(&c->ep_strength.nearend_len, -1.0f, 1.0f);
|
||||
|
||||
res =
|
||||
res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f);
|
||||
res = res &
|
||||
Limit(&c->echo_audibility.normal_render_limit, 0.f, 32768.f * 32768.f);
|
||||
res = res & Limit(&c->echo_audibility.floor_power, 0.f, 32768.f * 32768.f);
|
||||
res = res & Limit(&c->echo_audibility.audibility_threshold_lf, 0.f,
|
||||
32768.f * 32768.f);
|
||||
res = res & Limit(&c->echo_audibility.audibility_threshold_mf, 0.f,
|
||||
32768.f * 32768.f);
|
||||
res = res & Limit(&c->echo_audibility.audibility_threshold_hf, 0.f,
|
||||
32768.f * 32768.f);
|
||||
|
||||
res = res &
|
||||
Limit(&c->render_levels.active_render_limit, 0.f, 32768.f * 32768.f);
|
||||
res = res & Limit(&c->render_levels.poor_excitation_render_limit, 0.f,
|
||||
32768.f * 32768.f);
|
||||
res = res & Limit(&c->render_levels.poor_excitation_render_limit_ds8, 0.f,
|
||||
32768.f * 32768.f);
|
||||
|
||||
res = res & Limit(&c->echo_model.noise_floor_hold, 0, 1000);
|
||||
res = res & Limit(&c->echo_model.min_noise_floor_power, 0, 2000000.f);
|
||||
res = res & Limit(&c->echo_model.stationary_gate_slope, 0, 1000000.f);
|
||||
res = res & Limit(&c->echo_model.noise_gate_power, 0, 1000000.f);
|
||||
res = res & Limit(&c->echo_model.noise_gate_slope, 0, 1000000.f);
|
||||
res = res & Limit(&c->echo_model.render_pre_window_size, 0, 100);
|
||||
res = res & Limit(&c->echo_model.render_post_window_size, 0, 100);
|
||||
|
||||
res = res & Limit(&c->comfort_noise.noise_floor_dbfs, -200.f, 0.f);
|
||||
|
||||
res = res & Limit(&c->suppressor.nearend_average_blocks, 1, 5000);
|
||||
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_lf.enr_transparent, 0.f, 100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_lf.enr_suppress, 0.f, 100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_lf.emr_transparent, 0.f, 100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_hf.enr_transparent, 0.f, 100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_hf.enr_suppress, 0.f, 100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_hf.emr_transparent, 0.f, 100.f);
|
||||
res = res & Limit(&c->suppressor.normal_tuning.max_inc_factor, 0.f, 100.f);
|
||||
res = res & Limit(&c->suppressor.normal_tuning.max_dec_factor_lf, 0.f, 100.f);
|
||||
|
||||
res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.enr_transparent, 0.f,
|
||||
100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.nearend_tuning.mask_lf.enr_suppress, 0.f, 100.f);
|
||||
res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.emr_transparent, 0.f,
|
||||
100.f);
|
||||
res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.enr_transparent, 0.f,
|
||||
100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.nearend_tuning.mask_hf.enr_suppress, 0.f, 100.f);
|
||||
res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.emr_transparent, 0.f,
|
||||
100.f);
|
||||
res = res & Limit(&c->suppressor.nearend_tuning.max_inc_factor, 0.f, 100.f);
|
||||
res =
|
||||
res & Limit(&c->suppressor.nearend_tuning.max_dec_factor_lf, 0.f, 100.f);
|
||||
|
||||
res = res & Limit(&c->suppressor.last_permanent_lf_smoothing_band, 0, 64);
|
||||
res = res & Limit(&c->suppressor.last_lf_smoothing_band, 0, 64);
|
||||
res = res & Limit(&c->suppressor.last_lf_band, 0, 63);
|
||||
res = res &
|
||||
Limit(&c->suppressor.first_hf_band, c->suppressor.last_lf_band + 1, 64);
|
||||
|
||||
res = res & Limit(&c->suppressor.dominant_nearend_detection.enr_threshold,
|
||||
0.f, 1000000.f);
|
||||
res = res & Limit(&c->suppressor.dominant_nearend_detection.snr_threshold,
|
||||
0.f, 1000000.f);
|
||||
res = res & Limit(&c->suppressor.dominant_nearend_detection.hold_duration, 0,
|
||||
10000);
|
||||
res = res & Limit(&c->suppressor.dominant_nearend_detection.trigger_threshold,
|
||||
0, 10000);
|
||||
|
||||
res = res &
|
||||
Limit(&c->suppressor.subband_nearend_detection.nearend_average_blocks,
|
||||
1, 1024);
|
||||
res =
|
||||
res & Limit(&c->suppressor.subband_nearend_detection.subband1.low, 0, 65);
|
||||
res = res & Limit(&c->suppressor.subband_nearend_detection.subband1.high,
|
||||
c->suppressor.subband_nearend_detection.subband1.low, 65);
|
||||
res =
|
||||
res & Limit(&c->suppressor.subband_nearend_detection.subband2.low, 0, 65);
|
||||
res = res & Limit(&c->suppressor.subband_nearend_detection.subband2.high,
|
||||
c->suppressor.subband_nearend_detection.subband2.low, 65);
|
||||
res = res & Limit(&c->suppressor.subband_nearend_detection.nearend_threshold,
|
||||
0.f, 1.e24f);
|
||||
res = res & Limit(&c->suppressor.subband_nearend_detection.snr_threshold, 0.f,
|
||||
1.e24f);
|
||||
|
||||
res = res & Limit(&c->suppressor.high_bands_suppression.enr_threshold, 0.f,
|
||||
1000000.f);
|
||||
res = res & Limit(&c->suppressor.high_bands_suppression.max_gain_during_echo,
|
||||
0.f, 1.f);
|
||||
res = res & Limit(&c->suppressor.high_bands_suppression
|
||||
.anti_howling_activation_threshold,
|
||||
0.f, 32768.f * 32768.f);
|
||||
res = res & Limit(&c->suppressor.high_bands_suppression.anti_howling_gain,
|
||||
0.f, 1.f);
|
||||
|
||||
res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f);
|
||||
|
||||
return res;
|
||||
}
|
||||
} // namespace webrtc
|
250
VocieProcess/api/audio/echo_canceller3_config.h
Normal file
250
VocieProcess/api/audio/echo_canceller3_config.h
Normal file
@ -0,0 +1,250 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
|
||||
#define API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
|
||||
|
||||
#include <stddef.h> // size_t
|
||||
|
||||
#include "rtc_base/system/rtc_export.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Configuration struct for EchoCanceller3
|
||||
struct RTC_EXPORT EchoCanceller3Config {
|
||||
// Checks and updates the config parameters to lie within (mostly) reasonable
|
||||
// ranges. Returns true if and only of the config did not need to be changed.
|
||||
static bool Validate(EchoCanceller3Config* config);
|
||||
|
||||
EchoCanceller3Config();
|
||||
EchoCanceller3Config(const EchoCanceller3Config& e);
|
||||
EchoCanceller3Config& operator=(const EchoCanceller3Config& other);
|
||||
|
||||
struct Buffering {
|
||||
size_t excess_render_detection_interval_blocks = 250;
|
||||
size_t max_allowed_excess_render_blocks = 8;
|
||||
} buffering;
|
||||
|
||||
struct Delay {
|
||||
Delay();
|
||||
Delay(const Delay& e);
|
||||
Delay& operator=(const Delay& e);
|
||||
size_t default_delay = 5;
|
||||
size_t down_sampling_factor = 4;
|
||||
size_t num_filters = 5;
|
||||
size_t delay_headroom_samples = 32;
|
||||
size_t hysteresis_limit_blocks = 1;
|
||||
size_t fixed_capture_delay_samples = 0;
|
||||
float delay_estimate_smoothing = 0.7f;
|
||||
float delay_estimate_smoothing_delay_found = 0.7f;
|
||||
float delay_candidate_detection_threshold = 0.2f;
|
||||
struct DelaySelectionThresholds {
|
||||
int initial;
|
||||
int converged;
|
||||
} delay_selection_thresholds = {5, 20};
|
||||
bool use_external_delay_estimator = false;
|
||||
bool log_warning_on_delay_changes = false;
|
||||
struct AlignmentMixing {
|
||||
bool downmix;
|
||||
bool adaptive_selection;
|
||||
float activity_power_threshold;
|
||||
bool prefer_first_two_channels;
|
||||
};
|
||||
AlignmentMixing render_alignment_mixing = {false, true, 10000.f, true};
|
||||
AlignmentMixing capture_alignment_mixing = {false, true, 10000.f, false};
|
||||
bool detect_pre_echo = true;
|
||||
} delay;
|
||||
|
||||
struct Filter {
|
||||
struct RefinedConfiguration {
|
||||
size_t length_blocks;
|
||||
float leakage_converged;
|
||||
float leakage_diverged;
|
||||
float error_floor;
|
||||
float error_ceil;
|
||||
float noise_gate;
|
||||
};
|
||||
|
||||
struct CoarseConfiguration {
|
||||
size_t length_blocks;
|
||||
float rate;
|
||||
float noise_gate;
|
||||
};
|
||||
|
||||
RefinedConfiguration refined = {13, 0.00005f, 0.05f,
|
||||
0.001f, 2.f, 20075344.f};
|
||||
CoarseConfiguration coarse = {13, 0.7f, 20075344.f};
|
||||
|
||||
RefinedConfiguration refined_initial = {12, 0.005f, 0.5f,
|
||||
0.001f, 2.f, 20075344.f};
|
||||
CoarseConfiguration coarse_initial = {12, 0.9f, 20075344.f};
|
||||
|
||||
size_t config_change_duration_blocks = 250;
|
||||
float initial_state_seconds = 2.5f;
|
||||
int coarse_reset_hangover_blocks = 25;
|
||||
bool conservative_initial_phase = false;
|
||||
bool enable_coarse_filter_output_usage = true;
|
||||
bool use_linear_filter = true;
|
||||
bool high_pass_filter_echo_reference = false;
|
||||
bool export_linear_aec_output = false;
|
||||
} filter;
|
||||
|
||||
struct Erle {
|
||||
float min = 1.f;
|
||||
float max_l = 4.f;
|
||||
float max_h = 1.5f;
|
||||
bool onset_detection = true;
|
||||
size_t num_sections = 1;
|
||||
bool clamp_quality_estimate_to_zero = true;
|
||||
bool clamp_quality_estimate_to_one = true;
|
||||
} erle;
|
||||
|
||||
struct EpStrength {
|
||||
float default_gain = 1.f;
|
||||
float default_len = 0.83f;
|
||||
float nearend_len = 0.83f;
|
||||
bool echo_can_saturate = true;
|
||||
bool bounded_erl = false;
|
||||
bool erle_onset_compensation_in_dominant_nearend = false;
|
||||
bool use_conservative_tail_frequency_response = true;
|
||||
} ep_strength;
|
||||
|
||||
struct EchoAudibility {
|
||||
float low_render_limit = 4 * 64.f;
|
||||
float normal_render_limit = 64.f;
|
||||
float floor_power = 2 * 64.f;
|
||||
float audibility_threshold_lf = 10;
|
||||
float audibility_threshold_mf = 10;
|
||||
float audibility_threshold_hf = 10;
|
||||
bool use_stationarity_properties = false;
|
||||
bool use_stationarity_properties_at_init = false;
|
||||
} echo_audibility;
|
||||
|
||||
struct RenderLevels {
|
||||
float active_render_limit = 100.f;
|
||||
float poor_excitation_render_limit = 150.f;
|
||||
float poor_excitation_render_limit_ds8 = 20.f;
|
||||
float render_power_gain_db = 0.f;
|
||||
} render_levels;
|
||||
|
||||
struct EchoRemovalControl {
|
||||
bool has_clock_drift = false;
|
||||
bool linear_and_stable_echo_path = false;
|
||||
} echo_removal_control;
|
||||
|
||||
struct EchoModel {
|
||||
EchoModel();
|
||||
EchoModel(const EchoModel& e);
|
||||
EchoModel& operator=(const EchoModel& e);
|
||||
size_t noise_floor_hold = 50;
|
||||
float min_noise_floor_power = 1638400.f;
|
||||
float stationary_gate_slope = 10.f;
|
||||
float noise_gate_power = 27509.42f;
|
||||
float noise_gate_slope = 0.3f;
|
||||
size_t render_pre_window_size = 1;
|
||||
size_t render_post_window_size = 1;
|
||||
bool model_reverb_in_nonlinear_mode = true;
|
||||
} echo_model;
|
||||
|
||||
struct ComfortNoise {
|
||||
float noise_floor_dbfs = -96.03406f;
|
||||
} comfort_noise;
|
||||
|
||||
struct Suppressor {
|
||||
Suppressor();
|
||||
Suppressor(const Suppressor& e);
|
||||
Suppressor& operator=(const Suppressor& e);
|
||||
|
||||
size_t nearend_average_blocks = 4;
|
||||
|
||||
struct MaskingThresholds {
|
||||
MaskingThresholds(float enr_transparent,
|
||||
float enr_suppress,
|
||||
float emr_transparent);
|
||||
MaskingThresholds(const MaskingThresholds& e);
|
||||
MaskingThresholds& operator=(const MaskingThresholds& e);
|
||||
float enr_transparent;
|
||||
float enr_suppress;
|
||||
float emr_transparent;
|
||||
};
|
||||
|
||||
struct Tuning {
|
||||
Tuning(MaskingThresholds mask_lf,
|
||||
MaskingThresholds mask_hf,
|
||||
float max_inc_factor,
|
||||
float max_dec_factor_lf);
|
||||
Tuning(const Tuning& e);
|
||||
Tuning& operator=(const Tuning& e);
|
||||
MaskingThresholds mask_lf;
|
||||
MaskingThresholds mask_hf;
|
||||
float max_inc_factor;
|
||||
float max_dec_factor_lf;
|
||||
};
|
||||
|
||||
Tuning normal_tuning = Tuning(MaskingThresholds(.3f, .4f, .3f),
|
||||
MaskingThresholds(.07f, .1f, .3f),
|
||||
2.0f,
|
||||
0.25f);
|
||||
Tuning nearend_tuning = Tuning(MaskingThresholds(1.09f, 1.1f, .3f),
|
||||
MaskingThresholds(.1f, .3f, .3f),
|
||||
2.0f,
|
||||
0.25f);
|
||||
|
||||
bool lf_smoothing_during_initial_phase = true;
|
||||
int last_permanent_lf_smoothing_band = 0;
|
||||
int last_lf_smoothing_band = 5;
|
||||
int last_lf_band = 5;
|
||||
int first_hf_band = 8;
|
||||
|
||||
struct DominantNearendDetection {
|
||||
float enr_threshold = .25f;
|
||||
float enr_exit_threshold = 10.f;
|
||||
float snr_threshold = 30.f;
|
||||
int hold_duration = 50;
|
||||
int trigger_threshold = 12;
|
||||
bool use_during_initial_phase = true;
|
||||
bool use_unbounded_echo_spectrum = true;
|
||||
} dominant_nearend_detection;
|
||||
|
||||
struct SubbandNearendDetection {
|
||||
size_t nearend_average_blocks = 1;
|
||||
struct SubbandRegion {
|
||||
size_t low;
|
||||
size_t high;
|
||||
};
|
||||
SubbandRegion subband1 = {1, 1};
|
||||
SubbandRegion subband2 = {1, 1};
|
||||
float nearend_threshold = 1.f;
|
||||
float snr_threshold = 1.f;
|
||||
} subband_nearend_detection;
|
||||
|
||||
bool use_subband_nearend_detection = false;
|
||||
|
||||
struct HighBandsSuppression {
|
||||
float enr_threshold = 1.f;
|
||||
float max_gain_during_echo = 1.f;
|
||||
float anti_howling_activation_threshold = 400.f;
|
||||
float anti_howling_gain = 1.f;
|
||||
} high_bands_suppression;
|
||||
|
||||
float floor_first_increase = 0.00001f;
|
||||
bool conservative_hf_suppression = false;
|
||||
} suppressor;
|
||||
|
||||
struct MultiChannel {
|
||||
bool detect_stereo_content = true;
|
||||
float stereo_detection_threshold = 0.0f;
|
||||
int stereo_detection_timeout_threshold_seconds = 300;
|
||||
float stereo_detection_hysteresis_seconds = 2.0f;
|
||||
} multi_channel;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
|
75
VocieProcess/api/audio/echo_control.h
Normal file
75
VocieProcess/api/audio/echo_control.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_AUDIO_ECHO_CONTROL_H_
|
||||
#define API_AUDIO_ECHO_CONTROL_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioBuffer;
|
||||
|
||||
// Interface for an acoustic echo cancellation (AEC) submodule.
|
||||
class EchoControl {
|
||||
public:
|
||||
// Analysis (not changing) of the render signal.
|
||||
virtual void AnalyzeRender(AudioBuffer* render) = 0;
|
||||
|
||||
// Analysis (not changing) of the capture signal.
|
||||
virtual void AnalyzeCapture(AudioBuffer* capture) = 0;
|
||||
|
||||
// Processes the capture signal in order to remove the echo.
|
||||
virtual void ProcessCapture(AudioBuffer* capture, bool level_change) = 0;
|
||||
|
||||
// As above, but also returns the linear filter output.
|
||||
virtual void ProcessCapture(AudioBuffer* capture,
|
||||
AudioBuffer* linear_output,
|
||||
bool level_change) = 0;
|
||||
|
||||
struct Metrics {
|
||||
double echo_return_loss;
|
||||
double echo_return_loss_enhancement;
|
||||
int delay_ms;
|
||||
};
|
||||
|
||||
// Collect current metrics from the echo controller.
|
||||
virtual Metrics GetMetrics() const = 0;
|
||||
|
||||
// Provides an optional external estimate of the audio buffer delay.
|
||||
virtual void SetAudioBufferDelay(int delay_ms) = 0;
|
||||
|
||||
// Specifies whether the capture output will be used. The purpose of this is
|
||||
// to allow the echo controller to deactivate some of the processing when the
|
||||
// resulting output is anyway not used, for instance when the endpoint is
|
||||
// muted.
|
||||
// TODO(b/177830919): Make pure virtual.
|
||||
virtual void SetCaptureOutputUsage(bool capture_output_used) {}
|
||||
|
||||
// Returns wheter the signal is altered.
|
||||
virtual bool ActiveProcessing() const = 0;
|
||||
|
||||
virtual ~EchoControl() {}
|
||||
};
|
||||
|
||||
// Interface for a factory that creates EchoControllers.
|
||||
class EchoControlFactory {
|
||||
public:
|
||||
virtual std::unique_ptr<EchoControl> Create(int sample_rate_hz,
|
||||
int num_render_channels,
|
||||
int num_capture_channels) = 0;
|
||||
|
||||
virtual ~EchoControlFactory() = default;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_AUDIO_ECHO_CONTROL_H_
|
31
VocieProcess/api/location.h
Normal file
31
VocieProcess/api/location.h
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright 2023 The WebRTC Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_LOCATION_H_
|
||||
#define API_LOCATION_H_
|
||||
|
||||
#include "rtc_base/system/rtc_export.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Location provides basic info where of an object was constructed, or was
|
||||
// significantly brought to life. This is a stripped down version of
|
||||
// https://source.chromium.org/chromium/chromium/src/+/main:base/location.h
|
||||
// that only specifies an interface compatible to how base::Location is
|
||||
// supposed to be used.
|
||||
// The declaration is overriden inside the Chromium build.
|
||||
class RTC_EXPORT Location {
|
||||
public:
|
||||
static Location Current() { return Location(); }
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_LOCATION_H_
|
67
VocieProcess/api/ref_count.h
Normal file
67
VocieProcess/api/ref_count.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright 2011 The WebRTC Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef API_REF_COUNT_H_
|
||||
#define API_REF_COUNT_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Refcounted objects should implement the following informal interface:
|
||||
//
|
||||
// void AddRef() const ;
|
||||
// RefCountReleaseStatus Release() const;
|
||||
//
|
||||
// You may access members of a reference-counted object, including the AddRef()
|
||||
// and Release() methods, only if you already own a reference to it, or if
|
||||
// you're borrowing someone else's reference. (A newly created object is a
|
||||
// special case: the reference count is zero on construction, and the code that
|
||||
// creates the object should immediately call AddRef(), bringing the reference
|
||||
// count from zero to one, e.g., by constructing an rtc::scoped_refptr).
|
||||
//
|
||||
// AddRef() creates a new reference to the object.
|
||||
//
|
||||
// Release() releases a reference to the object; the caller now has one less
|
||||
// reference than before the call. Returns kDroppedLastRef if the number of
|
||||
// references dropped to zero because of this (in which case the object destroys
|
||||
// itself). Otherwise, returns kOtherRefsRemained, to signal that at the precise
|
||||
// time the caller's reference was dropped, other references still remained (but
|
||||
// if other threads own references, this may of course have changed by the time
|
||||
// Release() returns).
|
||||
//
|
||||
// The caller of Release() must treat it in the same way as a delete operation:
|
||||
// Regardless of the return value from Release(), the caller mustn't access the
|
||||
// object. The object might still be alive, due to references held by other
|
||||
// users of the object, but the object can go away at any time, e.g., as the
|
||||
// result of another thread calling Release().
|
||||
//
|
||||
// Calling AddRef() and Release() manually is discouraged. It's recommended to
|
||||
// use rtc::scoped_refptr to manage all pointers to reference counted objects.
|
||||
// Note that rtc::scoped_refptr depends on compile-time duck-typing; formally
|
||||
// implementing the below RefCountInterface is not required.
|
||||
|
||||
enum class RefCountReleaseStatus { kDroppedLastRef, kOtherRefsRemained };
|
||||
|
||||
// Interfaces where refcounting is part of the public api should
|
||||
// inherit this abstract interface. The implementation of these
|
||||
// methods is usually provided by the RefCountedObject template class,
|
||||
// applied as a leaf in the inheritance tree.
|
||||
class RefCountInterface {
|
||||
public:
|
||||
virtual void AddRef() const = 0;
|
||||
virtual RefCountReleaseStatus Release() const = 0;
|
||||
|
||||
// Non-public destructor, because Release() has exclusive responsibility for
|
||||
// destroying the object.
|
||||
protected:
|
||||
virtual ~RefCountInterface() {}
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_REF_COUNT_H_
|
227
VocieProcess/api/scoped_refptr.h
Normal file
227
VocieProcess/api/scoped_refptr.h
Normal file
@ -0,0 +1,227 @@
|
||||
/*
|
||||
* Copyright 2011 The WebRTC Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Originally these classes are from Chromium.
|
||||
// http://src.chromium.org/viewvc/chrome/trunk/src/base/memory/ref_counted.h?view=markup
|
||||
|
||||
//
|
||||
// A smart pointer class for reference counted objects. Use this class instead
|
||||
// of calling AddRef and Release manually on a reference counted object to
|
||||
// avoid common memory leaks caused by forgetting to Release an object
|
||||
// reference. Sample usage:
|
||||
//
|
||||
// class MyFoo : public RefCounted<MyFoo> {
|
||||
// ...
|
||||
// };
|
||||
//
|
||||
// void some_function() {
|
||||
// scoped_refptr<MyFoo> foo = new MyFoo();
|
||||
// foo->Method(param);
|
||||
// // `foo` is released when this function returns
|
||||
// }
|
||||
//
|
||||
// void some_other_function() {
|
||||
// scoped_refptr<MyFoo> foo = new MyFoo();
|
||||
// ...
|
||||
// foo = nullptr; // explicitly releases `foo`
|
||||
// ...
|
||||
// if (foo)
|
||||
// foo->Method(param);
|
||||
// }
|
||||
//
|
||||
// The above examples show how scoped_refptr<T> acts like a pointer to T.
|
||||
// Given two scoped_refptr<T> classes, it is also possible to exchange
|
||||
// references between the two objects, like so:
|
||||
//
|
||||
// {
|
||||
// scoped_refptr<MyFoo> a = new MyFoo();
|
||||
// scoped_refptr<MyFoo> b;
|
||||
//
|
||||
// b.swap(a);
|
||||
// // now, `b` references the MyFoo object, and `a` references null.
|
||||
// }
|
||||
//
|
||||
// To make both `a` and `b` in the above example reference the same MyFoo
|
||||
// object, simply use the assignment operator:
|
||||
//
|
||||
// {
|
||||
// scoped_refptr<MyFoo> a = new MyFoo();
|
||||
// scoped_refptr<MyFoo> b;
|
||||
//
|
||||
// b = a;
|
||||
// // now, `a` and `b` each own a reference to the same MyFoo object.
|
||||
// }
|
||||
//
|
||||
|
||||
#ifndef API_SCOPED_REFPTR_H_
|
||||
#define API_SCOPED_REFPTR_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
template <class T>
|
||||
class scoped_refptr {
|
||||
public:
|
||||
typedef T element_type;
|
||||
|
||||
scoped_refptr() : ptr_(nullptr) {}
|
||||
scoped_refptr(std::nullptr_t) : ptr_(nullptr) {} // NOLINT(runtime/explicit)
|
||||
|
||||
explicit scoped_refptr(T* p) : ptr_(p) {
|
||||
if (ptr_)
|
||||
ptr_->AddRef();
|
||||
}
|
||||
|
||||
scoped_refptr(const scoped_refptr<T>& r) : ptr_(r.ptr_) {
|
||||
if (ptr_)
|
||||
ptr_->AddRef();
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
scoped_refptr(const scoped_refptr<U>& r) : ptr_(r.get()) {
|
||||
if (ptr_)
|
||||
ptr_->AddRef();
|
||||
}
|
||||
|
||||
// Move constructors.
|
||||
scoped_refptr(scoped_refptr<T>&& r) noexcept : ptr_(r.release()) {}
|
||||
|
||||
template <typename U>
|
||||
scoped_refptr(scoped_refptr<U>&& r) noexcept : ptr_(r.release()) {}
|
||||
|
||||
~scoped_refptr() {
|
||||
if (ptr_)
|
||||
ptr_->Release();
|
||||
}
|
||||
|
||||
T* get() const { return ptr_; }
|
||||
explicit operator bool() const { return ptr_ != nullptr; }
|
||||
T& operator*() const { return *ptr_; }
|
||||
T* operator->() const { return ptr_; }
|
||||
|
||||
// Returns the (possibly null) raw pointer, and makes the scoped_refptr hold a
|
||||
// null pointer, all without touching the reference count of the underlying
|
||||
// pointed-to object. The object is still reference counted, and the caller of
|
||||
// release() is now the proud owner of one reference, so it is responsible for
|
||||
// calling Release() once on the object when no longer using it.
|
||||
T* release() {
|
||||
T* retVal = ptr_;
|
||||
ptr_ = nullptr;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
scoped_refptr<T>& operator=(T* p) {
|
||||
// AddRef first so that self assignment should work
|
||||
if (p)
|
||||
p->AddRef();
|
||||
if (ptr_)
|
||||
ptr_->Release();
|
||||
ptr_ = p;
|
||||
return *this;
|
||||
}
|
||||
|
||||
scoped_refptr<T>& operator=(const scoped_refptr<T>& r) {
|
||||
return *this = r.ptr_;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
scoped_refptr<T>& operator=(const scoped_refptr<U>& r) {
|
||||
return *this = r.get();
|
||||
}
|
||||
|
||||
scoped_refptr<T>& operator=(scoped_refptr<T>&& r) noexcept {
|
||||
scoped_refptr<T>(std::move(r)).swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
scoped_refptr<T>& operator=(scoped_refptr<U>&& r) noexcept {
|
||||
scoped_refptr<T>(std::move(r)).swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void swap(T** pp) noexcept {
|
||||
T* p = ptr_;
|
||||
ptr_ = *pp;
|
||||
*pp = p;
|
||||
}
|
||||
|
||||
void swap(scoped_refptr<T>& r) noexcept { swap(&r.ptr_); }
|
||||
|
||||
protected:
|
||||
T* ptr_;
|
||||
};
|
||||
|
||||
template <typename T, typename U>
|
||||
bool operator==(const scoped_refptr<T>& a, const scoped_refptr<U>& b) {
|
||||
return a.get() == b.get();
|
||||
}
|
||||
template <typename T, typename U>
|
||||
bool operator!=(const scoped_refptr<T>& a, const scoped_refptr<U>& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator==(const scoped_refptr<T>& a, std::nullptr_t) {
|
||||
return a.get() == nullptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator!=(const scoped_refptr<T>& a, std::nullptr_t) {
|
||||
return !(a == nullptr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator==(std::nullptr_t, const scoped_refptr<T>& a) {
|
||||
return a.get() == nullptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator!=(std::nullptr_t, const scoped_refptr<T>& a) {
|
||||
return !(a == nullptr);
|
||||
}
|
||||
|
||||
// Comparison with raw pointer.
|
||||
template <typename T, typename U>
|
||||
bool operator==(const scoped_refptr<T>& a, const U* b) {
|
||||
return a.get() == b;
|
||||
}
|
||||
template <typename T, typename U>
|
||||
bool operator!=(const scoped_refptr<T>& a, const U* b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
bool operator==(const T* a, const scoped_refptr<U>& b) {
|
||||
return a == b.get();
|
||||
}
|
||||
template <typename T, typename U>
|
||||
bool operator!=(const T* a, const scoped_refptr<U>& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
// Ordered comparison, needed for use as a std::map key.
|
||||
template <typename T, typename U>
|
||||
bool operator<(const scoped_refptr<T>& a, const scoped_refptr<U>& b) {
|
||||
return a.get() < b.get();
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
namespace rtc {
|
||||
// Backwards compatible alias.
|
||||
// TODO(bugs.webrtc.org/15622): Deprecate and remove.
|
||||
template <typename T>
|
||||
using scoped_refptr = webrtc::scoped_refptr<T>;
|
||||
} // namespace rtc
|
||||
|
||||
#endif // API_SCOPED_REFPTR_H_
|
78
VocieProcess/api/task_queue/task_queue_base.cc
Normal file
78
VocieProcess/api/task_queue/task_queue_base.cc
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright 2019 The WebRTC Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "api/task_queue/task_queue_base.h"
|
||||
|
||||
#include "absl/base/attributes.h"
|
||||
#include "absl/base/config.h"
|
||||
|
||||
#if defined(ABSL_HAVE_THREAD_LOCAL)
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
ABSL_CONST_INIT thread_local TaskQueueBase* current = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
TaskQueueBase* TaskQueueBase::Current() {
|
||||
return current;
|
||||
}
|
||||
|
||||
TaskQueueBase::CurrentTaskQueueSetter::CurrentTaskQueueSetter(
|
||||
TaskQueueBase* task_queue)
|
||||
: previous_(current) {
|
||||
current = task_queue;
|
||||
}
|
||||
|
||||
TaskQueueBase::CurrentTaskQueueSetter::~CurrentTaskQueueSetter() {
|
||||
current = previous_;
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
||||
#elif defined(WEBRTC_POSIX)
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
ABSL_CONST_INIT pthread_key_t g_queue_ptr_tls = 0;
|
||||
|
||||
void InitializeTls() {
|
||||
RTC_CHECK(pthread_key_create(&g_queue_ptr_tls, nullptr) == 0);
|
||||
}
|
||||
|
||||
pthread_key_t GetQueuePtrTls() {
|
||||
static pthread_once_t init_once = PTHREAD_ONCE_INIT;
|
||||
RTC_CHECK(pthread_once(&init_once, &InitializeTls) == 0);
|
||||
return g_queue_ptr_tls;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TaskQueueBase* TaskQueueBase::Current() {
|
||||
return static_cast<TaskQueueBase*>(pthread_getspecific(GetQueuePtrTls()));
|
||||
}
|
||||
|
||||
TaskQueueBase::CurrentTaskQueueSetter::CurrentTaskQueueSetter(
|
||||
TaskQueueBase* task_queue)
|
||||
: previous_(TaskQueueBase::Current()) {
|
||||
pthread_setspecific(GetQueuePtrTls(), task_queue);
|
||||
}
|
||||
|
||||
TaskQueueBase::CurrentTaskQueueSetter::~CurrentTaskQueueSetter() {
|
||||
pthread_setspecific(GetQueuePtrTls(), previous_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#else
|
||||
#error Unsupported platform
|
||||
#endif
|
197
VocieProcess/api/task_queue/task_queue_base.h
Normal file
197
VocieProcess/api/task_queue/task_queue_base.h
Normal file
@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Copyright 2019 The WebRTC Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef API_TASK_QUEUE_TASK_QUEUE_BASE_H_
|
||||
#define API_TASK_QUEUE_TASK_QUEUE_BASE_H_
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "absl/functional/any_invocable.h"
|
||||
#include "api/location.h"
|
||||
#include "api/units/time_delta.h"
|
||||
#include "rtc_base/system/rtc_export.h"
|
||||
#include "rtc_base/thread_annotations.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Asynchronously executes tasks in a way that guarantees that they're executed
|
||||
// in FIFO order and that tasks never overlap. Tasks may always execute on the
|
||||
// same worker thread and they may not. To DCHECK that tasks are executing on a
|
||||
// known task queue, use IsCurrent().
|
||||
class RTC_LOCKABLE RTC_EXPORT TaskQueueBase {
|
||||
public:
|
||||
enum class DelayPrecision {
|
||||
// This may include up to a 17 ms leeway in addition to OS timer precision.
|
||||
// See PostDelayedTask() for more information.
|
||||
kLow,
|
||||
// This does not have the additional delay that kLow has, but it is still
|
||||
// limited by OS timer precision. See PostDelayedHighPrecisionTask() for
|
||||
// more information.
|
||||
kHigh,
|
||||
};
|
||||
|
||||
// Starts destruction of the task queue.
|
||||
// On return ensures no task are running and no new tasks are able to start
|
||||
// on the task queue.
|
||||
// Responsible for deallocation. Deallocation may happen synchronously during
|
||||
// Delete or asynchronously after Delete returns.
|
||||
// Code not running on the TaskQueue should not make any assumption when
|
||||
// TaskQueue is deallocated and thus should not call any methods after Delete.
|
||||
// Code running on the TaskQueue should not call Delete, but can assume
|
||||
// TaskQueue still exists and may call other methods, e.g. PostTask.
|
||||
// Should be called on the same task queue or thread that this task queue
|
||||
// was created on.
|
||||
virtual void Delete() = 0;
|
||||
|
||||
// Schedules a `task` to execute. Tasks are executed in FIFO order.
|
||||
// When a TaskQueue is deleted, pending tasks will not be executed but they
|
||||
// will be deleted.
|
||||
//
|
||||
// As long as tasks are not posted from task destruction, posted tasks are
|
||||
// guaranteed to be destroyed with Current() pointing to the task queue they
|
||||
// were posted to, whether they're executed or not. That means SequenceChecker
|
||||
// works during task destruction, a fact that can be used to guarantee
|
||||
// thread-compatible object deletion happening on a particular task queue
|
||||
// which can simplify class design.
|
||||
// Note that this guarantee does not apply to delayed tasks.
|
||||
//
|
||||
// May be called on any thread or task queue, including this task queue.
|
||||
void PostTask(absl::AnyInvocable<void() &&> task,
|
||||
const Location& location = Location::Current()) {
|
||||
PostTaskImpl(std::move(task), PostTaskTraits{}, location);
|
||||
}
|
||||
|
||||
// Prefer PostDelayedTask() over PostDelayedHighPrecisionTask() whenever
|
||||
// possible.
|
||||
//
|
||||
// Schedules a `task` to execute a specified `delay` from when the call is
|
||||
// made, using "low" precision. All scheduling is affected by OS-specific
|
||||
// leeway and current workloads which means that in terms of precision there
|
||||
// are no hard guarantees, but in addition to the OS induced leeway, "low"
|
||||
// precision adds up to a 17 ms additional leeway. The purpose of this leeway
|
||||
// is to achieve more efficient CPU scheduling and reduce Idle Wake Up
|
||||
// frequency.
|
||||
//
|
||||
// The task may execute with [-1, 17 + OS induced leeway) ms additional delay.
|
||||
//
|
||||
// Avoid making assumptions about the precision of the OS scheduler. On macOS,
|
||||
// the OS induced leeway may be 10% of sleep interval. On Windows, 1 ms
|
||||
// precision timers may be used but there are cases, such as when running on
|
||||
// battery, when the timer precision can be as poor as 15 ms.
|
||||
//
|
||||
// "Low" precision is not implemented everywhere yet. Where not yet
|
||||
// implemented, PostDelayedTask() has "high" precision. See
|
||||
// https://crbug.com/webrtc/13583 for more information.
|
||||
//
|
||||
// May be called on any thread or task queue, including this task queue.
|
||||
void PostDelayedTask(absl::AnyInvocable<void() &&> task,
|
||||
TimeDelta delay,
|
||||
const Location& location = Location::Current()) {
|
||||
PostDelayedTaskImpl(std::move(task), delay, PostDelayedTaskTraits{},
|
||||
location);
|
||||
}
|
||||
|
||||
// Prefer PostDelayedTask() over PostDelayedHighPrecisionTask() whenever
|
||||
// possible.
|
||||
//
|
||||
// Schedules a `task` to execute a specified `delay` from when the call is
|
||||
// made, using "high" precision. All scheduling is affected by OS-specific
|
||||
// leeway and current workloads which means that in terms of precision there
|
||||
// are no hard guarantees.
|
||||
//
|
||||
// The task may execute with [-1, OS induced leeway] ms additional delay.
|
||||
//
|
||||
// Avoid making assumptions about the precision of the OS scheduler. On macOS,
|
||||
// the OS induced leeway may be 10% of sleep interval. On Windows, 1 ms
|
||||
// precision timers may be used but there are cases, such as when running on
|
||||
// battery, when the timer precision can be as poor as 15 ms.
|
||||
//
|
||||
// May be called on any thread or task queue, including this task queue.
|
||||
void PostDelayedHighPrecisionTask(
|
||||
absl::AnyInvocable<void() &&> task,
|
||||
TimeDelta delay,
|
||||
const Location& location = Location::Current()) {
|
||||
PostDelayedTaskTraits traits;
|
||||
traits.high_precision = true;
|
||||
PostDelayedTaskImpl(std::move(task), delay, traits, location);
|
||||
}
|
||||
|
||||
// As specified by `precision`, calls either PostDelayedTask() or
|
||||
// PostDelayedHighPrecisionTask().
|
||||
void PostDelayedTaskWithPrecision(
|
||||
DelayPrecision precision,
|
||||
absl::AnyInvocable<void() &&> task,
|
||||
TimeDelta delay,
|
||||
const Location& location = Location::Current()) {
|
||||
switch (precision) {
|
||||
case DelayPrecision::kLow:
|
||||
PostDelayedTask(std::move(task), delay, location);
|
||||
break;
|
||||
case DelayPrecision::kHigh:
|
||||
PostDelayedHighPrecisionTask(std::move(task), delay, location);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the task queue that is running the current thread.
|
||||
// Returns nullptr if this thread is not associated with any task queue.
|
||||
// May be called on any thread or task queue, including this task queue.
|
||||
static TaskQueueBase* Current();
|
||||
bool IsCurrent() const { return Current() == this; }
|
||||
|
||||
protected:
|
||||
// This is currently only present here to simplify introduction of future
|
||||
// planned task queue changes.
|
||||
struct PostTaskTraits {};
|
||||
|
||||
struct PostDelayedTaskTraits {
|
||||
// If `high_precision` is false, tasks may execute within up to a 17 ms
|
||||
// leeway in addition to OS timer precision. Otherwise the task should be
|
||||
// limited to OS timer precision. See PostDelayedTask() and
|
||||
// PostDelayedHighPrecisionTask() for more information.
|
||||
bool high_precision = false;
|
||||
};
|
||||
|
||||
class RTC_EXPORT CurrentTaskQueueSetter {
|
||||
public:
|
||||
explicit CurrentTaskQueueSetter(TaskQueueBase* task_queue);
|
||||
CurrentTaskQueueSetter(const CurrentTaskQueueSetter&) = delete;
|
||||
CurrentTaskQueueSetter& operator=(const CurrentTaskQueueSetter&) = delete;
|
||||
~CurrentTaskQueueSetter();
|
||||
|
||||
private:
|
||||
TaskQueueBase* const previous_;
|
||||
};
|
||||
|
||||
// Subclasses should implement this method to support the behavior defined in
|
||||
// the PostTask and PostTaskTraits docs above.
|
||||
virtual void PostTaskImpl(absl::AnyInvocable<void() &&> task,
|
||||
const PostTaskTraits& traits,
|
||||
const Location& location) = 0;
|
||||
|
||||
// Subclasses should implement this method to support the behavior defined in
|
||||
// the PostDelayedTask/PostHighPrecisionDelayedTask and PostDelayedTaskTraits
|
||||
// docs above.
|
||||
virtual void PostDelayedTaskImpl(absl::AnyInvocable<void() &&> task,
|
||||
TimeDelta delay,
|
||||
const PostDelayedTaskTraits& traits,
|
||||
const Location& location) = 0;
|
||||
|
||||
// Users of the TaskQueue should call Delete instead of directly deleting
|
||||
// this object.
|
||||
virtual ~TaskQueueBase() = default;
|
||||
};
|
||||
|
||||
struct TaskQueueDeleter {
|
||||
void operator()(TaskQueueBase* task_queue) const { task_queue->Delete(); }
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_TASK_QUEUE_TASK_QUEUE_BASE_H_
|
38
VocieProcess/api/units/time_delta.cc
Normal file
38
VocieProcess/api/units/time_delta.cc
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/units/time_delta.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "rtc_base/strings/string_builder.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
std::string ToString(TimeDelta value) {
|
||||
char buf[64];
|
||||
rtc::SimpleStringBuilder sb(buf);
|
||||
if (value.IsPlusInfinity()) {
|
||||
sb << "+inf ms";
|
||||
} else if (value.IsMinusInfinity()) {
|
||||
sb << "-inf ms";
|
||||
} else {
|
||||
if (value.us() == 0 || (value.us() % 1000) != 0)
|
||||
sb << value.us() << " us";
|
||||
else if (value.ms() % 1000 != 0)
|
||||
sb << value.ms() << " ms";
|
||||
else
|
||||
sb << value.seconds() << " s";
|
||||
}
|
||||
return sb.str();
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
99
VocieProcess/api/units/time_delta.h
Normal file
99
VocieProcess/api/units/time_delta.h
Normal file
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_UNITS_TIME_DELTA_H_
|
||||
#define API_UNITS_TIME_DELTA_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
#include "rtc_base/units/unit_base.h" // IWYU pragma: export
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// TimeDelta represents the difference between two timestamps. Commonly this can
|
||||
// be a duration. However since two Timestamps are not guaranteed to have the
|
||||
// same epoch (they might come from different computers, making exact
|
||||
// synchronisation infeasible), the duration covered by a TimeDelta can be
|
||||
// undefined. To simplify usage, it can be constructed and converted to
|
||||
// different units, specifically seconds (s), milliseconds (ms) and
|
||||
// microseconds (us).
|
||||
class TimeDelta final : public rtc_units_impl::RelativeUnit<TimeDelta> {
|
||||
public:
|
||||
template <typename T>
|
||||
static constexpr TimeDelta Minutes(T value) {
|
||||
static_assert(std::is_arithmetic<T>::value, "");
|
||||
return Seconds(value * 60);
|
||||
}
|
||||
template <typename T>
|
||||
static constexpr TimeDelta Seconds(T value) {
|
||||
static_assert(std::is_arithmetic<T>::value, "");
|
||||
return FromFraction(1'000'000, value);
|
||||
}
|
||||
template <typename T>
|
||||
static constexpr TimeDelta Millis(T value) {
|
||||
static_assert(std::is_arithmetic<T>::value, "");
|
||||
return FromFraction(1'000, value);
|
||||
}
|
||||
template <typename T>
|
||||
static constexpr TimeDelta Micros(T value) {
|
||||
static_assert(std::is_arithmetic<T>::value, "");
|
||||
return FromValue(value);
|
||||
}
|
||||
|
||||
TimeDelta() = delete;
|
||||
|
||||
template <typename T = int64_t>
|
||||
constexpr T seconds() const {
|
||||
return ToFraction<1000000, T>();
|
||||
}
|
||||
template <typename T = int64_t>
|
||||
constexpr T ms() const {
|
||||
return ToFraction<1000, T>();
|
||||
}
|
||||
template <typename T = int64_t>
|
||||
constexpr T us() const {
|
||||
return ToValue<T>();
|
||||
}
|
||||
template <typename T = int64_t>
|
||||
constexpr T ns() const {
|
||||
return ToMultiple<1000, T>();
|
||||
}
|
||||
|
||||
constexpr int64_t seconds_or(int64_t fallback_value) const {
|
||||
return ToFractionOr<1000000>(fallback_value);
|
||||
}
|
||||
constexpr int64_t ms_or(int64_t fallback_value) const {
|
||||
return ToFractionOr<1000>(fallback_value);
|
||||
}
|
||||
constexpr int64_t us_or(int64_t fallback_value) const {
|
||||
return ToValueOr(fallback_value);
|
||||
}
|
||||
|
||||
constexpr TimeDelta Abs() const {
|
||||
return us() < 0 ? TimeDelta::Micros(-us()) : *this;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class rtc_units_impl::UnitBase<TimeDelta>;
|
||||
using RelativeUnit::RelativeUnit;
|
||||
static constexpr bool one_sided = false;
|
||||
};
|
||||
|
||||
std::string ToString(TimeDelta value);
|
||||
inline std::string ToLogString(TimeDelta value) {
|
||||
return ToString(value);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_UNITS_TIME_DELTA_H_
|
36
VocieProcess/api/units/timestamp.cc
Normal file
36
VocieProcess/api/units/timestamp.cc
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/units/timestamp.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "rtc_base/strings/string_builder.h"
|
||||
|
||||
namespace webrtc {
|
||||
std::string ToString(Timestamp value) {
|
||||
char buf[64];
|
||||
rtc::SimpleStringBuilder sb(buf);
|
||||
if (value.IsPlusInfinity()) {
|
||||
sb << "+inf ms";
|
||||
} else if (value.IsMinusInfinity()) {
|
||||
sb << "-inf ms";
|
||||
} else {
|
||||
if (value.us() == 0 || (value.us() % 1000) != 0)
|
||||
sb << value.us() << " us";
|
||||
else if (value.ms() % 1000 != 0)
|
||||
sb << value.ms() << " ms";
|
||||
else
|
||||
sb << value.seconds() << " s";
|
||||
}
|
||||
return sb.str();
|
||||
}
|
||||
} // namespace webrtc
|
128
VocieProcess/api/units/timestamp.h
Normal file
128
VocieProcess/api/units/timestamp.h
Normal file
@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_UNITS_TIMESTAMP_H_
|
||||
#define API_UNITS_TIMESTAMP_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
#include "api/units/time_delta.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/units/unit_base.h" // IWYU pragma: export
|
||||
|
||||
namespace webrtc {
|
||||
// Timestamp represents the time that has passed since some unspecified epoch.
|
||||
// The epoch is assumed to be before any represented timestamps, this means that
|
||||
// negative values are not valid. The most notable feature is that the
|
||||
// difference of two Timestamps results in a TimeDelta.
|
||||
class Timestamp final : public rtc_units_impl::UnitBase<Timestamp> {
|
||||
public:
|
||||
template <typename T>
|
||||
static constexpr Timestamp Seconds(T value) {
|
||||
static_assert(std::is_arithmetic<T>::value, "");
|
||||
return FromFraction(1'000'000, value);
|
||||
}
|
||||
template <typename T>
|
||||
static constexpr Timestamp Millis(T value) {
|
||||
static_assert(std::is_arithmetic<T>::value, "");
|
||||
return FromFraction(1'000, value);
|
||||
}
|
||||
template <typename T>
|
||||
static constexpr Timestamp Micros(T value) {
|
||||
static_assert(std::is_arithmetic<T>::value, "");
|
||||
return FromValue(value);
|
||||
}
|
||||
|
||||
Timestamp() = delete;
|
||||
|
||||
template <typename T = int64_t>
|
||||
constexpr T seconds() const {
|
||||
return ToFraction<1000000, T>();
|
||||
}
|
||||
template <typename T = int64_t>
|
||||
constexpr T ms() const {
|
||||
return ToFraction<1000, T>();
|
||||
}
|
||||
template <typename T = int64_t>
|
||||
constexpr T us() const {
|
||||
return ToValue<T>();
|
||||
}
|
||||
|
||||
constexpr int64_t seconds_or(int64_t fallback_value) const {
|
||||
return ToFractionOr<1000000>(fallback_value);
|
||||
}
|
||||
constexpr int64_t ms_or(int64_t fallback_value) const {
|
||||
return ToFractionOr<1000>(fallback_value);
|
||||
}
|
||||
constexpr int64_t us_or(int64_t fallback_value) const {
|
||||
return ToValueOr(fallback_value);
|
||||
}
|
||||
|
||||
constexpr Timestamp operator+(const TimeDelta delta) const {
|
||||
if (IsPlusInfinity() || delta.IsPlusInfinity()) {
|
||||
RTC_DCHECK(!IsMinusInfinity());
|
||||
RTC_DCHECK(!delta.IsMinusInfinity());
|
||||
return PlusInfinity();
|
||||
} else if (IsMinusInfinity() || delta.IsMinusInfinity()) {
|
||||
RTC_DCHECK(!IsPlusInfinity());
|
||||
RTC_DCHECK(!delta.IsPlusInfinity());
|
||||
return MinusInfinity();
|
||||
}
|
||||
return Timestamp::Micros(us() + delta.us());
|
||||
}
|
||||
constexpr Timestamp operator-(const TimeDelta delta) const {
|
||||
if (IsPlusInfinity() || delta.IsMinusInfinity()) {
|
||||
RTC_DCHECK(!IsMinusInfinity());
|
||||
RTC_DCHECK(!delta.IsPlusInfinity());
|
||||
return PlusInfinity();
|
||||
} else if (IsMinusInfinity() || delta.IsPlusInfinity()) {
|
||||
RTC_DCHECK(!IsPlusInfinity());
|
||||
RTC_DCHECK(!delta.IsMinusInfinity());
|
||||
return MinusInfinity();
|
||||
}
|
||||
return Timestamp::Micros(us() - delta.us());
|
||||
}
|
||||
constexpr TimeDelta operator-(const Timestamp other) const {
|
||||
if (IsPlusInfinity() || other.IsMinusInfinity()) {
|
||||
RTC_DCHECK(!IsMinusInfinity());
|
||||
RTC_DCHECK(!other.IsPlusInfinity());
|
||||
return TimeDelta::PlusInfinity();
|
||||
} else if (IsMinusInfinity() || other.IsPlusInfinity()) {
|
||||
RTC_DCHECK(!IsPlusInfinity());
|
||||
RTC_DCHECK(!other.IsMinusInfinity());
|
||||
return TimeDelta::MinusInfinity();
|
||||
}
|
||||
return TimeDelta::Micros(us() - other.us());
|
||||
}
|
||||
constexpr Timestamp& operator-=(const TimeDelta delta) {
|
||||
*this = *this - delta;
|
||||
return *this;
|
||||
}
|
||||
constexpr Timestamp& operator+=(const TimeDelta delta) {
|
||||
*this = *this + delta;
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class rtc_units_impl::UnitBase<Timestamp>;
|
||||
using UnitBase::UnitBase;
|
||||
static constexpr bool one_sided = true;
|
||||
};
|
||||
|
||||
std::string ToString(Timestamp value);
|
||||
inline std::string ToLogString(Timestamp value) {
|
||||
return ToString(value);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_UNITS_TIMESTAMP_H_
|
80
VocieProcess/common_audio/channel_buffer.cc
Normal file
80
VocieProcess/common_audio/channel_buffer.cc
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "common_audio/channel_buffer.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
IFChannelBuffer::IFChannelBuffer(size_t num_frames,
|
||||
size_t num_channels,
|
||||
size_t num_bands)
|
||||
: ivalid_(true),
|
||||
ibuf_(num_frames, num_channels, num_bands),
|
||||
fvalid_(true),
|
||||
fbuf_(num_frames, num_channels, num_bands) {}
|
||||
|
||||
IFChannelBuffer::~IFChannelBuffer() = default;
|
||||
|
||||
ChannelBuffer<int16_t>* IFChannelBuffer::ibuf() {
|
||||
RefreshI();
|
||||
fvalid_ = false;
|
||||
return &ibuf_;
|
||||
}
|
||||
|
||||
ChannelBuffer<float>* IFChannelBuffer::fbuf() {
|
||||
RefreshF();
|
||||
ivalid_ = false;
|
||||
return &fbuf_;
|
||||
}
|
||||
|
||||
const ChannelBuffer<int16_t>* IFChannelBuffer::ibuf_const() const {
|
||||
RefreshI();
|
||||
return &ibuf_;
|
||||
}
|
||||
|
||||
const ChannelBuffer<float>* IFChannelBuffer::fbuf_const() const {
|
||||
RefreshF();
|
||||
return &fbuf_;
|
||||
}
|
||||
|
||||
void IFChannelBuffer::RefreshF() const {
|
||||
if (!fvalid_) {
|
||||
RTC_DCHECK(ivalid_);
|
||||
fbuf_.set_num_channels(ibuf_.num_channels());
|
||||
const int16_t* const* int_channels = ibuf_.channels();
|
||||
float* const* float_channels = fbuf_.channels();
|
||||
for (size_t i = 0; i < ibuf_.num_channels(); ++i) {
|
||||
for (size_t j = 0; j < ibuf_.num_frames(); ++j) {
|
||||
float_channels[i][j] = int_channels[i][j];
|
||||
}
|
||||
}
|
||||
fvalid_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void IFChannelBuffer::RefreshI() const {
|
||||
if (!ivalid_) {
|
||||
RTC_DCHECK(fvalid_);
|
||||
int16_t* const* int_channels = ibuf_.channels();
|
||||
ibuf_.set_num_channels(fbuf_.num_channels());
|
||||
const float* const* float_channels = fbuf_.channels();
|
||||
for (size_t i = 0; i < fbuf_.num_channels(); ++i) {
|
||||
FloatS16ToS16(float_channels[i], ibuf_.num_frames(), int_channels[i]);
|
||||
}
|
||||
ivalid_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
253
VocieProcess/common_audio/channel_buffer.h
Normal file
253
VocieProcess/common_audio/channel_buffer.h
Normal file
@ -0,0 +1,253 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef COMMON_AUDIO_CHANNEL_BUFFER_H_
|
||||
#define COMMON_AUDIO_CHANNEL_BUFFER_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/gtest_prod_util.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// TODO: b/335805780 - Remove this method. Instead, use Deinterleave() from
|
||||
// audio_util.h which requires size checked buffer views.
|
||||
template <typename T>
|
||||
void Deinterleave(const T* interleaved,
|
||||
size_t samples_per_channel,
|
||||
size_t num_channels,
|
||||
T* const* deinterleaved) {
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
T* channel = deinterleaved[i];
|
||||
size_t interleaved_idx = i;
|
||||
for (size_t j = 0; j < samples_per_channel; ++j) {
|
||||
channel[j] = interleaved[interleaved_idx];
|
||||
interleaved_idx += num_channels;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// `Interleave()` variant for cases where the deinterleaved channels aren't
|
||||
// represented by a `DeinterleavedView`.
|
||||
// TODO: b/335805780 - Remove this method. Instead, use Deinterleave() from
|
||||
// audio_util.h which requires size checked buffer views.
|
||||
template <typename T>
|
||||
void Interleave(const T* const* deinterleaved,
|
||||
size_t samples_per_channel,
|
||||
size_t num_channels,
|
||||
InterleavedView<T>& interleaved) {
|
||||
RTC_DCHECK_EQ(NumChannels(interleaved), num_channels);
|
||||
RTC_DCHECK_EQ(SamplesPerChannel(interleaved), samples_per_channel);
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
const T* channel = deinterleaved[i];
|
||||
size_t interleaved_idx = i;
|
||||
for (size_t j = 0; j < samples_per_channel; ++j) {
|
||||
interleaved[interleaved_idx] = channel[j];
|
||||
interleaved_idx += num_channels;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper to encapsulate a contiguous data buffer, full or split into frequency
|
||||
// bands, with access to a pointer arrays of the deinterleaved channels and
|
||||
// bands. The buffer is zero initialized at creation.
|
||||
//
|
||||
// The buffer structure is showed below for a 2 channel and 2 bands case:
|
||||
//
|
||||
// `data_`:
|
||||
// { [ --- b1ch1 --- ] [ --- b2ch1 --- ] [ --- b1ch2 --- ] [ --- b2ch2 --- ] }
|
||||
//
|
||||
// The pointer arrays for the same example are as follows:
|
||||
//
|
||||
// `channels_`:
|
||||
// { [ b1ch1* ] [ b1ch2* ] [ b2ch1* ] [ b2ch2* ] }
|
||||
//
|
||||
// `bands_`:
|
||||
// { [ b1ch1* ] [ b2ch1* ] [ b1ch2* ] [ b2ch2* ] }
|
||||
template <typename T>
|
||||
class ChannelBuffer {
|
||||
public:
|
||||
ChannelBuffer(size_t num_frames, size_t num_channels, size_t num_bands = 1)
|
||||
: data_(new T[num_frames * num_channels]()),
|
||||
channels_(new T*[num_channels * num_bands]),
|
||||
bands_(new T*[num_channels * num_bands]),
|
||||
num_frames_(num_frames),
|
||||
num_frames_per_band_(num_frames / num_bands),
|
||||
num_allocated_channels_(num_channels),
|
||||
num_channels_(num_channels),
|
||||
num_bands_(num_bands),
|
||||
bands_view_(num_allocated_channels_,
|
||||
std::vector<rtc::ArrayView<T>>(num_bands_)),
|
||||
channels_view_(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<T>>(num_allocated_channels_)) {
|
||||
// Temporarily cast away const_ness to allow populating the array views.
|
||||
auto* bands_view =
|
||||
const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(&bands_view_);
|
||||
auto* channels_view =
|
||||
const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(
|
||||
&channels_view_);
|
||||
|
||||
for (size_t ch = 0; ch < num_allocated_channels_; ++ch) {
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
(*channels_view)[band][ch] = rtc::ArrayView<T>(
|
||||
&data_[ch * num_frames_ + band * num_frames_per_band_],
|
||||
num_frames_per_band_);
|
||||
(*bands_view)[ch][band] = channels_view_[band][ch];
|
||||
channels_[band * num_allocated_channels_ + ch] =
|
||||
channels_view_[band][ch].data();
|
||||
bands_[ch * num_bands_ + band] =
|
||||
channels_[band * num_allocated_channels_ + ch];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a pointer array to the channels.
|
||||
// If band is explicitly specificed, the channels for a specific band are
|
||||
// returned and the usage becomes: channels(band)[channel][sample].
|
||||
// Where:
|
||||
// 0 <= band < `num_bands_`
|
||||
// 0 <= channel < `num_allocated_channels_`
|
||||
// 0 <= sample < `num_frames_per_band_`
|
||||
|
||||
// If band is not explicitly specified, the full-band channels (or lower band
|
||||
// channels) are returned and the usage becomes: channels()[channel][sample].
|
||||
// Where:
|
||||
// 0 <= channel < `num_allocated_channels_`
|
||||
// 0 <= sample < `num_frames_`
|
||||
const T* const* channels(size_t band = 0) const {
|
||||
RTC_DCHECK_LT(band, num_bands_);
|
||||
return &channels_[band * num_allocated_channels_];
|
||||
}
|
||||
T* const* channels(size_t band = 0) {
|
||||
const ChannelBuffer<T>* t = this;
|
||||
return const_cast<T* const*>(t->channels(band));
|
||||
}
|
||||
rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) {
|
||||
return channels_view_[band];
|
||||
}
|
||||
rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) const {
|
||||
return channels_view_[band];
|
||||
}
|
||||
|
||||
// Returns a pointer array to the bands for a specific channel.
|
||||
// Usage:
|
||||
// bands(channel)[band][sample].
|
||||
// Where:
|
||||
// 0 <= channel < `num_channels_`
|
||||
// 0 <= band < `num_bands_`
|
||||
// 0 <= sample < `num_frames_per_band_`
|
||||
const T* const* bands(size_t channel) const {
|
||||
RTC_DCHECK_LT(channel, num_channels_);
|
||||
RTC_DCHECK_GE(channel, 0);
|
||||
return &bands_[channel * num_bands_];
|
||||
}
|
||||
T* const* bands(size_t channel) {
|
||||
const ChannelBuffer<T>* t = this;
|
||||
return const_cast<T* const*>(t->bands(channel));
|
||||
}
|
||||
|
||||
rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) {
|
||||
return bands_view_[channel];
|
||||
}
|
||||
rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) const {
|
||||
return bands_view_[channel];
|
||||
}
|
||||
|
||||
// Sets the `slice` pointers to the `start_frame` position for each channel.
|
||||
// Returns `slice` for convenience.
|
||||
const T* const* Slice(T** slice, size_t start_frame) const {
|
||||
RTC_DCHECK_LT(start_frame, num_frames_);
|
||||
for (size_t i = 0; i < num_channels_; ++i)
|
||||
slice[i] = &channels_[i][start_frame];
|
||||
return slice;
|
||||
}
|
||||
T** Slice(T** slice, size_t start_frame) {
|
||||
const ChannelBuffer<T>* t = this;
|
||||
return const_cast<T**>(t->Slice(slice, start_frame));
|
||||
}
|
||||
|
||||
size_t num_frames() const { return num_frames_; }
|
||||
size_t num_frames_per_band() const { return num_frames_per_band_; }
|
||||
size_t num_channels() const { return num_channels_; }
|
||||
size_t num_bands() const { return num_bands_; }
|
||||
size_t size() const { return num_frames_ * num_allocated_channels_; }
|
||||
|
||||
void set_num_channels(size_t num_channels) {
|
||||
RTC_DCHECK_LE(num_channels, num_allocated_channels_);
|
||||
num_channels_ = num_channels;
|
||||
}
|
||||
|
||||
void SetDataForTesting(const T* data, size_t size) {
|
||||
RTC_CHECK_EQ(size, this->size());
|
||||
memcpy(data_.get(), data, size * sizeof(*data));
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<T[]> data_;
|
||||
std::unique_ptr<T*[]> channels_;
|
||||
std::unique_ptr<T*[]> bands_;
|
||||
const size_t num_frames_;
|
||||
const size_t num_frames_per_band_;
|
||||
// Number of channels the internal buffer holds.
|
||||
const size_t num_allocated_channels_;
|
||||
// Number of channels the user sees.
|
||||
size_t num_channels_;
|
||||
const size_t num_bands_;
|
||||
const std::vector<std::vector<rtc::ArrayView<T>>> bands_view_;
|
||||
const std::vector<std::vector<rtc::ArrayView<T>>> channels_view_;
|
||||
};
|
||||
|
||||
// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
|
||||
// broken when someone requests write access to either ChannelBuffer, and
|
||||
// reestablished when someone requests the outdated ChannelBuffer. It is
|
||||
// therefore safe to use the return value of ibuf_const() and fbuf_const()
|
||||
// until the next call to ibuf() or fbuf(), and the return value of ibuf() and
|
||||
// fbuf() until the next call to any of the other functions.
|
||||
class IFChannelBuffer {
|
||||
public:
|
||||
IFChannelBuffer(size_t num_frames, size_t num_channels, size_t num_bands = 1);
|
||||
~IFChannelBuffer();
|
||||
|
||||
ChannelBuffer<int16_t>* ibuf();
|
||||
ChannelBuffer<float>* fbuf();
|
||||
const ChannelBuffer<int16_t>* ibuf_const() const;
|
||||
const ChannelBuffer<float>* fbuf_const() const;
|
||||
|
||||
size_t num_frames() const { return ibuf_.num_frames(); }
|
||||
size_t num_frames_per_band() const { return ibuf_.num_frames_per_band(); }
|
||||
size_t num_channels() const {
|
||||
return ivalid_ ? ibuf_.num_channels() : fbuf_.num_channels();
|
||||
}
|
||||
void set_num_channels(size_t num_channels) {
|
||||
ibuf_.set_num_channels(num_channels);
|
||||
fbuf_.set_num_channels(num_channels);
|
||||
}
|
||||
size_t num_bands() const { return ibuf_.num_bands(); }
|
||||
|
||||
private:
|
||||
void RefreshF() const;
|
||||
void RefreshI() const;
|
||||
|
||||
mutable bool ivalid_;
|
||||
mutable ChannelBuffer<int16_t> ibuf_;
|
||||
mutable bool fvalid_;
|
||||
mutable ChannelBuffer<float> fbuf_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // COMMON_AUDIO_CHANNEL_BUFFER_H_
|
204
VocieProcess/common_audio/include/audio_util.h
Normal file
204
VocieProcess/common_audio/include/audio_util.h
Normal file
@ -0,0 +1,204 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_
|
||||
#define COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
|
||||
#include "api/audio/audio_view.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
typedef std::numeric_limits<int16_t> limits_int16;
|
||||
|
||||
// TODO(tommi, peah): Move these constants to their own header, e.g.
|
||||
// `audio_constants.h`. Also consider if they should be in api/.
|
||||
|
||||
// Absolute highest acceptable sample rate supported for audio processing,
|
||||
// capture and codecs. Note that for some components some cases a lower limit
|
||||
// applies which typically is 48000 but in some cases is lower.
|
||||
constexpr int kMaxSampleRateHz = 384000;
|
||||
|
||||
// Number of samples per channel for 10ms of audio at the highest sample rate.
|
||||
constexpr size_t kMaxSamplesPerChannel10ms = kMaxSampleRateHz / 100u;
|
||||
|
||||
// The conversion functions use the following naming convention:
|
||||
// S16: int16_t [-32768, 32767]
|
||||
// Float: float [-1.0, 1.0]
|
||||
// FloatS16: float [-32768.0, 32768.0]
|
||||
// Dbfs: float [-20.0*log(10, 32768), 0] = [-90.3, 0]
|
||||
// The ratio conversion functions use this naming convention:
|
||||
// Ratio: float (0, +inf)
|
||||
// Db: float (-inf, +inf)
|
||||
static inline float S16ToFloat(int16_t v) {
|
||||
constexpr float kScaling = 1.f / 32768.f;
|
||||
return v * kScaling;
|
||||
}
|
||||
|
||||
static inline int16_t FloatS16ToS16(float v) {
|
||||
v = std::min(v, 32767.f);
|
||||
v = std::max(v, -32768.f);
|
||||
return static_cast<int16_t>(v + std::copysign(0.5f, v));
|
||||
}
|
||||
|
||||
static inline int16_t FloatToS16(float v) {
|
||||
v *= 32768.f;
|
||||
v = std::min(v, 32767.f);
|
||||
v = std::max(v, -32768.f);
|
||||
return static_cast<int16_t>(v + std::copysign(0.5f, v));
|
||||
}
|
||||
|
||||
static inline float FloatToFloatS16(float v) {
|
||||
v = std::min(v, 1.f);
|
||||
v = std::max(v, -1.f);
|
||||
return v * 32768.f;
|
||||
}
|
||||
|
||||
static inline float FloatS16ToFloat(float v) {
|
||||
v = std::min(v, 32768.f);
|
||||
v = std::max(v, -32768.f);
|
||||
constexpr float kScaling = 1.f / 32768.f;
|
||||
return v * kScaling;
|
||||
}
|
||||
|
||||
void FloatToS16(const float* src, size_t size, int16_t* dest);
|
||||
void S16ToFloat(const int16_t* src, size_t size, float* dest);
|
||||
void S16ToFloatS16(const int16_t* src, size_t size, float* dest);
|
||||
void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
|
||||
void FloatToFloatS16(const float* src, size_t size, float* dest);
|
||||
void FloatS16ToFloat(const float* src, size_t size, float* dest);
|
||||
|
||||
inline float DbToRatio(float v) {
|
||||
return std::pow(10.0f, v / 20.0f);
|
||||
}
|
||||
|
||||
inline float DbfsToFloatS16(float v) {
|
||||
static constexpr float kMaximumAbsFloatS16 = -limits_int16::min();
|
||||
return DbToRatio(v) * kMaximumAbsFloatS16;
|
||||
}
|
||||
|
||||
inline float FloatS16ToDbfs(float v) {
|
||||
RTC_DCHECK_GE(v, 0);
|
||||
|
||||
// kMinDbfs is equal to -20.0 * log10(-limits_int16::min())
|
||||
static constexpr float kMinDbfs = -90.30899869919436f;
|
||||
if (v <= 1.0f) {
|
||||
return kMinDbfs;
|
||||
}
|
||||
// Equal to 20 * log10(v / (-limits_int16::min()))
|
||||
return 20.0f * std::log10(v) + kMinDbfs;
|
||||
}
|
||||
|
||||
// Copy audio from `src` channels to `dest` channels unless `src` and `dest`
|
||||
// point to the same address. `src` and `dest` must have the same number of
|
||||
// channels, and there must be sufficient space allocated in `dest`.
|
||||
// TODO: b/335805780 - Accept ArrayView.
|
||||
template <typename T>
|
||||
void CopyAudioIfNeeded(const T* const* src,
|
||||
int num_frames,
|
||||
int num_channels,
|
||||
T* const* dest) {
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
if (src[i] != dest[i]) {
|
||||
std::copy(src[i], src[i] + num_frames, dest[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deinterleave audio from `interleaved` to the channel buffers pointed to
|
||||
// by `deinterleaved`. There must be sufficient space allocated in the
|
||||
// `deinterleaved` buffers (`num_channel` buffers with `samples_per_channel`
|
||||
// per buffer).
|
||||
template <typename T>
|
||||
void Deinterleave(const InterleavedView<const T>& interleaved,
|
||||
const DeinterleavedView<T>& deinterleaved) {
|
||||
RTC_DCHECK_EQ(NumChannels(interleaved), NumChannels(deinterleaved));
|
||||
RTC_DCHECK_EQ(SamplesPerChannel(interleaved),
|
||||
SamplesPerChannel(deinterleaved));
|
||||
const auto num_channels = NumChannels(interleaved);
|
||||
const auto samples_per_channel = SamplesPerChannel(interleaved);
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
MonoView<T> channel = deinterleaved[i];
|
||||
size_t interleaved_idx = i;
|
||||
for (size_t j = 0; j < samples_per_channel; ++j) {
|
||||
channel[j] = interleaved[interleaved_idx];
|
||||
interleaved_idx += num_channels;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Interleave audio from the channel buffers pointed to by `deinterleaved` to
|
||||
// `interleaved`. There must be sufficient space allocated in `interleaved`
|
||||
// (`samples_per_channel` * `num_channels`).
|
||||
template <typename T>
|
||||
void Interleave(const DeinterleavedView<const T>& deinterleaved,
|
||||
const InterleavedView<T>& interleaved) {
|
||||
RTC_DCHECK_EQ(NumChannels(interleaved), NumChannels(deinterleaved));
|
||||
RTC_DCHECK_EQ(SamplesPerChannel(interleaved),
|
||||
SamplesPerChannel(deinterleaved));
|
||||
for (size_t i = 0; i < deinterleaved.num_channels(); ++i) {
|
||||
const auto channel = deinterleaved[i];
|
||||
size_t interleaved_idx = i;
|
||||
for (size_t j = 0; j < deinterleaved.samples_per_channel(); ++j) {
|
||||
interleaved[interleaved_idx] = channel[j];
|
||||
interleaved_idx += deinterleaved.num_channels();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Downmixes an interleaved multichannel signal to a single channel by averaging
|
||||
// all channels.
|
||||
// TODO: b/335805780 - Accept InterleavedView and DeinterleavedView.
|
||||
template <typename T, typename Intermediate>
|
||||
void DownmixInterleavedToMonoImpl(const T* interleaved,
|
||||
size_t num_frames,
|
||||
int num_channels,
|
||||
T* deinterleaved) {
|
||||
RTC_DCHECK_GT(num_channels, 0);
|
||||
RTC_DCHECK_GT(num_frames, 0);
|
||||
|
||||
const T* const end = interleaved + num_frames * num_channels;
|
||||
|
||||
while (interleaved < end) {
|
||||
const T* const frame_end = interleaved + num_channels;
|
||||
|
||||
Intermediate value = *interleaved++;
|
||||
while (interleaved < frame_end) {
|
||||
value += *interleaved++;
|
||||
}
|
||||
|
||||
*deinterleaved++ = value / num_channels;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: b/335805780 - Accept InterleavedView and DeinterleavedView.
|
||||
template <typename T>
|
||||
void DownmixInterleavedToMono(const T* interleaved,
|
||||
size_t num_frames,
|
||||
int num_channels,
|
||||
T* deinterleaved);
|
||||
|
||||
// TODO: b/335805780 - Accept InterleavedView and DeinterleavedView.
|
||||
template <>
|
||||
void DownmixInterleavedToMono<int16_t>(const int16_t* interleaved,
|
||||
size_t num_frames,
|
||||
int num_channels,
|
||||
int16_t* deinterleaved);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_
|
102
VocieProcess/common_audio/resampler/push_sinc_resampler.cc
Normal file
102
VocieProcess/common_audio/resampler/push_sinc_resampler.cc
Normal file
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "common_audio/resampler/push_sinc_resampler.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
PushSincResampler::PushSincResampler(size_t source_frames,
|
||||
size_t destination_frames)
|
||||
: resampler_(new SincResampler(source_frames * 1.0 / destination_frames,
|
||||
source_frames,
|
||||
this)),
|
||||
source_ptr_(nullptr),
|
||||
source_ptr_int_(nullptr),
|
||||
destination_frames_(destination_frames),
|
||||
first_pass_(true),
|
||||
source_available_(0) {}
|
||||
|
||||
PushSincResampler::~PushSincResampler() {}
|
||||
|
||||
size_t PushSincResampler::Resample(const int16_t* source,
|
||||
size_t source_length,
|
||||
int16_t* destination,
|
||||
size_t destination_capacity) {
|
||||
if (!float_buffer_.get())
|
||||
float_buffer_.reset(new float[destination_frames_]);
|
||||
|
||||
source_ptr_int_ = source;
|
||||
// Pass nullptr as the float source to have Run() read from the int16 source.
|
||||
Resample(nullptr, source_length, float_buffer_.get(), destination_frames_);
|
||||
FloatS16ToS16(float_buffer_.get(), destination_frames_, destination);
|
||||
source_ptr_int_ = nullptr;
|
||||
return destination_frames_;
|
||||
}
|
||||
|
||||
size_t PushSincResampler::Resample(const float* source,
|
||||
size_t source_length,
|
||||
float* destination,
|
||||
size_t destination_capacity) {
|
||||
RTC_CHECK_EQ(source_length, resampler_->request_frames());
|
||||
RTC_CHECK_GE(destination_capacity, destination_frames_);
|
||||
// Cache the source pointer. Calling Resample() will immediately trigger
|
||||
// the Run() callback whereupon we provide the cached value.
|
||||
source_ptr_ = source;
|
||||
source_available_ = source_length;
|
||||
|
||||
// On the first pass, we call Resample() twice. During the first call, we
|
||||
// provide dummy input and discard the output. This is done to prime the
|
||||
// SincResampler buffer with the correct delay (half the kernel size), thereby
|
||||
// ensuring that all later Resample() calls will only result in one input
|
||||
// request through Run().
|
||||
//
|
||||
// If this wasn't done, SincResampler would call Run() twice on the first
|
||||
// pass, and we'd have to introduce an entire `source_frames` of delay, rather
|
||||
// than the minimum half kernel.
|
||||
//
|
||||
// It works out that ChunkSize() is exactly the amount of output we need to
|
||||
// request in order to prime the buffer with a single Run() request for
|
||||
// `source_frames`.
|
||||
if (first_pass_)
|
||||
resampler_->Resample(resampler_->ChunkSize(), destination);
|
||||
|
||||
resampler_->Resample(destination_frames_, destination);
|
||||
source_ptr_ = nullptr;
|
||||
return destination_frames_;
|
||||
}
|
||||
|
||||
void PushSincResampler::Run(size_t frames, float* destination) {
|
||||
// Ensure we are only asked for the available samples. This would fail if
|
||||
// Run() was triggered more than once per Resample() call.
|
||||
RTC_CHECK_EQ(source_available_, frames);
|
||||
|
||||
if (first_pass_) {
|
||||
// Provide dummy input on the first pass, the output of which will be
|
||||
// discarded, as described in Resample().
|
||||
std::memset(destination, 0, frames * sizeof(*destination));
|
||||
first_pass_ = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (source_ptr_) {
|
||||
std::memcpy(destination, source_ptr_, frames * sizeof(*destination));
|
||||
} else {
|
||||
for (size_t i = 0; i < frames; ++i)
|
||||
destination[i] = static_cast<float>(source_ptr_int_[i]);
|
||||
}
|
||||
source_available_ -= frames;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
88
VocieProcess/common_audio/resampler/push_sinc_resampler.h
Normal file
88
VocieProcess/common_audio/resampler/push_sinc_resampler.h
Normal file
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_
|
||||
#define COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "api/audio/audio_view.h"
|
||||
#include "common_audio/resampler/sinc_resampler.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// A thin wrapper over SincResampler to provide a push-based interface as
|
||||
// required by WebRTC. SincResampler uses a pull-based interface, and will
|
||||
// use SincResamplerCallback::Run() to request data upon a call to Resample().
|
||||
// These Run() calls will happen on the same thread Resample() is called on.
|
||||
class PushSincResampler : public SincResamplerCallback {
|
||||
public:
|
||||
// Provide the size of the source and destination blocks in samples. These
|
||||
// must correspond to the same time duration (typically 10 ms) as the sample
|
||||
// ratio is inferred from them.
|
||||
PushSincResampler(size_t source_frames, size_t destination_frames);
|
||||
~PushSincResampler() override;
|
||||
|
||||
PushSincResampler(const PushSincResampler&) = delete;
|
||||
PushSincResampler& operator=(const PushSincResampler&) = delete;
|
||||
|
||||
// Perform the resampling. `source_frames` must always equal the
|
||||
// `source_frames` provided at construction. `destination_capacity` must be
|
||||
// at least as large as `destination_frames`. Returns the number of samples
|
||||
// provided in destination (for convenience, since this will always be equal
|
||||
// to `destination_frames`).
|
||||
template <typename S, typename D>
|
||||
size_t Resample(const MonoView<S>& source, const MonoView<D>& destination) {
|
||||
return Resample(&source[0], SamplesPerChannel(source), &destination[0],
|
||||
SamplesPerChannel(destination));
|
||||
}
|
||||
|
||||
size_t Resample(const int16_t* source,
|
||||
size_t source_frames,
|
||||
int16_t* destination,
|
||||
size_t destination_capacity);
|
||||
size_t Resample(const float* source,
|
||||
size_t source_frames,
|
||||
float* destination,
|
||||
size_t destination_capacity);
|
||||
|
||||
// Delay due to the filter kernel. Essentially, the time after which an input
|
||||
// sample will appear in the resampled output.
|
||||
static float AlgorithmicDelaySeconds(int source_rate_hz) {
|
||||
return 1.f / source_rate_hz * SincResampler::kKernelSize / 2;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Implements SincResamplerCallback.
|
||||
void Run(size_t frames, float* destination) override;
|
||||
|
||||
private:
|
||||
friend class PushSincResamplerTest;
|
||||
SincResampler* get_resampler_for_testing() { return resampler_.get(); }
|
||||
|
||||
std::unique_ptr<SincResampler> resampler_;
|
||||
std::unique_ptr<float[]> float_buffer_;
|
||||
const float* source_ptr_;
|
||||
const int16_t* source_ptr_int_;
|
||||
const size_t destination_frames_;
|
||||
|
||||
// True on the first call to Resample(), to prime the SincResampler buffer.
|
||||
bool first_pass_;
|
||||
|
||||
// Used to assert we are only requested for as much data as is available.
|
||||
size_t source_available_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_
|
366
VocieProcess/common_audio/resampler/sinc_resampler.cc
Normal file
366
VocieProcess/common_audio/resampler/sinc_resampler.cc
Normal file
@ -0,0 +1,366 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Modified from the Chromium original:
|
||||
// src/media/base/sinc_resampler.cc
|
||||
|
||||
// Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_
|
||||
// and r4_ will move after the first load):
|
||||
//
|
||||
// |----------------|-----------------------------------------|----------------|
|
||||
//
|
||||
// request_frames_
|
||||
// <--------------------------------------------------------->
|
||||
// r0_ (during first load)
|
||||
//
|
||||
// kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 kKernelSize / 2
|
||||
// <---------------> <---------------> <---------------> <--------------->
|
||||
// r1_ r2_ r3_ r4_
|
||||
//
|
||||
// block_size_ == r4_ - r2_
|
||||
// <--------------------------------------->
|
||||
//
|
||||
// request_frames_
|
||||
// <------------------ ... ----------------->
|
||||
// r0_ (during second load)
|
||||
//
|
||||
// On the second request r0_ slides to the right by kKernelSize / 2 and r3_, r4_
|
||||
// and block_size_ are reinitialized via step (3) in the algorithm below.
|
||||
//
|
||||
// These new regions remain constant until a Flush() occurs. While complicated,
|
||||
// this allows us to reduce jitter by always requesting the same amount from the
|
||||
// provided callback.
|
||||
//
|
||||
// The algorithm:
|
||||
//
|
||||
// 1) Allocate input_buffer of size: request_frames_ + kKernelSize; this ensures
|
||||
// there's enough room to read request_frames_ from the callback into region
|
||||
// r0_ (which will move between the first and subsequent passes).
|
||||
//
|
||||
// 2) Let r1_, r2_ each represent half the kernel centered around r0_:
|
||||
//
|
||||
// r0_ = input_buffer_ + kKernelSize / 2
|
||||
// r1_ = input_buffer_
|
||||
// r2_ = r0_
|
||||
//
|
||||
// r0_ is always request_frames_ in size. r1_, r2_ are kKernelSize / 2 in
|
||||
// size. r1_ must be zero initialized to avoid convolution with garbage (see
|
||||
// step (5) for why).
|
||||
//
|
||||
// 3) Let r3_, r4_ each represent half the kernel right aligned with the end of
|
||||
// r0_ and choose block_size_ as the distance in frames between r4_ and r2_:
|
||||
//
|
||||
// r3_ = r0_ + request_frames_ - kKernelSize
|
||||
// r4_ = r0_ + request_frames_ - kKernelSize / 2
|
||||
// block_size_ = r4_ - r2_ = request_frames_ - kKernelSize / 2
|
||||
//
|
||||
// 4) Consume request_frames_ frames into r0_.
|
||||
//
|
||||
// 5) Position kernel centered at start of r2_ and generate output frames until
|
||||
// the kernel is centered at the start of r4_ or we've finished generating
|
||||
// all the output frames.
|
||||
//
|
||||
// 6) Wrap left over data from the r3_ to r1_ and r4_ to r2_.
|
||||
//
|
||||
// 7) If we're on the second load, in order to avoid overwriting the frames we
|
||||
// just wrapped from r4_ we need to slide r0_ to the right by the size of
|
||||
// r4_, which is kKernelSize / 2:
|
||||
//
|
||||
// r0_ = r0_ + kKernelSize / 2 = input_buffer_ + kKernelSize
|
||||
//
|
||||
// r3_, r4_, and block_size_ then need to be reinitialized, so goto (3).
|
||||
//
|
||||
// 8) Else, if we're not on the second load, goto (4).
|
||||
//
|
||||
// Note: we're glossing over how the sub-sample handling works with
|
||||
// `virtual_source_idx_`, etc.
|
||||
|
||||
// MSVC++ requires this to be set before any other includes to get M_PI.
|
||||
#define _USE_MATH_DEFINES
|
||||
|
||||
#include "common_audio/resampler/sinc_resampler.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
#include "system_wrappers/include/cpu_features_wrapper.h" // kSSE2, WebRtc_G...
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
double SincScaleFactor(double io_ratio) {
|
||||
// `sinc_scale_factor` is basically the normalized cutoff frequency of the
|
||||
// low-pass filter.
|
||||
double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0;
|
||||
|
||||
// The sinc function is an idealized brick-wall filter, but since we're
|
||||
// windowing it the transition from pass to stop does not happen right away.
|
||||
// So we should adjust the low pass filter cutoff slightly downward to avoid
|
||||
// some aliasing at the very high-end.
|
||||
// TODO(crogers): this value is empirical and to be more exact should vary
|
||||
// depending on kKernelSize.
|
||||
sinc_scale_factor *= 0.9;
|
||||
|
||||
return sinc_scale_factor;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
const size_t SincResampler::kKernelSize;
|
||||
|
||||
// If we know the minimum architecture at compile time, avoid CPU detection.
|
||||
void SincResampler::InitializeCPUSpecificFeatures() {
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
convolve_proc_ = Convolve_NEON;
|
||||
#elif defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
// Using AVX2 instead of SSE2 when AVX2/FMA3 supported.
|
||||
if (GetCPUInfo(kAVX2) && GetCPUInfo(kFMA3))
|
||||
convolve_proc_ = Convolve_AVX2;
|
||||
else if (GetCPUInfo(kSSE2))
|
||||
convolve_proc_ = Convolve_SSE;
|
||||
else
|
||||
convolve_proc_ = Convolve_C;
|
||||
#else
|
||||
// Unknown architecture.
|
||||
convolve_proc_ = Convolve_C;
|
||||
#endif
|
||||
}
|
||||
|
||||
SincResampler::SincResampler(double io_sample_rate_ratio,
|
||||
size_t request_frames,
|
||||
SincResamplerCallback* read_cb)
|
||||
: io_sample_rate_ratio_(io_sample_rate_ratio),
|
||||
read_cb_(read_cb),
|
||||
request_frames_(request_frames),
|
||||
input_buffer_size_(request_frames_ + kKernelSize),
|
||||
// Create input buffers with a 32-byte alignment for SIMD optimizations.
|
||||
kernel_storage_(static_cast<float*>(
|
||||
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
|
||||
kernel_pre_sinc_storage_(static_cast<float*>(
|
||||
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
|
||||
kernel_window_storage_(static_cast<float*>(
|
||||
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
|
||||
input_buffer_(static_cast<float*>(
|
||||
AlignedMalloc(sizeof(float) * input_buffer_size_, 32))),
|
||||
convolve_proc_(nullptr),
|
||||
r1_(input_buffer_.get()),
|
||||
r2_(input_buffer_.get() + kKernelSize / 2) {
|
||||
InitializeCPUSpecificFeatures();
|
||||
RTC_DCHECK(convolve_proc_);
|
||||
RTC_DCHECK_GT(request_frames_, 0);
|
||||
Flush();
|
||||
RTC_DCHECK_GT(block_size_, kKernelSize);
|
||||
|
||||
memset(kernel_storage_.get(), 0,
|
||||
sizeof(*kernel_storage_.get()) * kKernelStorageSize);
|
||||
memset(kernel_pre_sinc_storage_.get(), 0,
|
||||
sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize);
|
||||
memset(kernel_window_storage_.get(), 0,
|
||||
sizeof(*kernel_window_storage_.get()) * kKernelStorageSize);
|
||||
|
||||
InitializeKernel();
|
||||
}
|
||||
|
||||
SincResampler::~SincResampler() {}
|
||||
|
||||
void SincResampler::UpdateRegions(bool second_load) {
|
||||
// Setup various region pointers in the buffer (see diagram above). If we're
|
||||
// on the second load we need to slide r0_ to the right by kKernelSize / 2.
|
||||
r0_ = input_buffer_.get() + (second_load ? kKernelSize : kKernelSize / 2);
|
||||
r3_ = r0_ + request_frames_ - kKernelSize;
|
||||
r4_ = r0_ + request_frames_ - kKernelSize / 2;
|
||||
block_size_ = r4_ - r2_;
|
||||
|
||||
// r1_ at the beginning of the buffer.
|
||||
RTC_DCHECK_EQ(r1_, input_buffer_.get());
|
||||
// r1_ left of r2_, r4_ left of r3_ and size correct.
|
||||
RTC_DCHECK_EQ(r2_ - r1_, r4_ - r3_);
|
||||
// r2_ left of r3.
|
||||
RTC_DCHECK_LT(r2_, r3_);
|
||||
}
|
||||
|
||||
void SincResampler::InitializeKernel() {
|
||||
// Blackman window parameters.
|
||||
static const double kAlpha = 0.16;
|
||||
static const double kA0 = 0.5 * (1.0 - kAlpha);
|
||||
static const double kA1 = 0.5;
|
||||
static const double kA2 = 0.5 * kAlpha;
|
||||
|
||||
// Generates a set of windowed sinc() kernels.
|
||||
// We generate a range of sub-sample offsets from 0.0 to 1.0.
|
||||
const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);
|
||||
for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {
|
||||
const float subsample_offset =
|
||||
static_cast<float>(offset_idx) / kKernelOffsetCount;
|
||||
|
||||
for (size_t i = 0; i < kKernelSize; ++i) {
|
||||
const size_t idx = i + offset_idx * kKernelSize;
|
||||
const float pre_sinc = static_cast<float>(
|
||||
M_PI * (static_cast<int>(i) - static_cast<int>(kKernelSize / 2) -
|
||||
subsample_offset));
|
||||
kernel_pre_sinc_storage_[idx] = pre_sinc;
|
||||
|
||||
// Compute Blackman window, matching the offset of the sinc().
|
||||
const float x = (i - subsample_offset) / kKernelSize;
|
||||
const float window = static_cast<float>(kA0 - kA1 * cos(2.0 * M_PI * x) +
|
||||
kA2 * cos(4.0 * M_PI * x));
|
||||
kernel_window_storage_[idx] = window;
|
||||
|
||||
// Compute the sinc with offset, then window the sinc() function and store
|
||||
// at the correct offset.
|
||||
kernel_storage_[idx] = static_cast<float>(
|
||||
window * ((pre_sinc == 0)
|
||||
? sinc_scale_factor
|
||||
: (sin(sinc_scale_factor * pre_sinc) / pre_sinc)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SincResampler::SetRatio(double io_sample_rate_ratio) {
|
||||
if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) <
|
||||
std::numeric_limits<double>::epsilon()) {
|
||||
return;
|
||||
}
|
||||
|
||||
io_sample_rate_ratio_ = io_sample_rate_ratio;
|
||||
|
||||
// Optimize reinitialization by reusing values which are independent of
|
||||
// `sinc_scale_factor`. Provides a 3x speedup.
|
||||
const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);
|
||||
for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {
|
||||
for (size_t i = 0; i < kKernelSize; ++i) {
|
||||
const size_t idx = i + offset_idx * kKernelSize;
|
||||
const float window = kernel_window_storage_[idx];
|
||||
const float pre_sinc = kernel_pre_sinc_storage_[idx];
|
||||
|
||||
kernel_storage_[idx] = static_cast<float>(
|
||||
window * ((pre_sinc == 0)
|
||||
? sinc_scale_factor
|
||||
: (sin(sinc_scale_factor * pre_sinc) / pre_sinc)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SincResampler::Resample(size_t frames, float* destination) {
|
||||
size_t remaining_frames = frames;
|
||||
|
||||
// Step (1) -- Prime the input buffer at the start of the input stream.
|
||||
if (!buffer_primed_ && remaining_frames) {
|
||||
read_cb_->Run(request_frames_, r0_);
|
||||
buffer_primed_ = true;
|
||||
}
|
||||
|
||||
// Step (2) -- Resample! const what we can outside of the loop for speed. It
|
||||
// actually has an impact on ARM performance. See inner loop comment below.
|
||||
const double current_io_ratio = io_sample_rate_ratio_;
|
||||
const float* const kernel_ptr = kernel_storage_.get();
|
||||
while (remaining_frames) {
|
||||
// `i` may be negative if the last Resample() call ended on an iteration
|
||||
// that put `virtual_source_idx_` over the limit.
|
||||
//
|
||||
// Note: The loop construct here can severely impact performance on ARM
|
||||
// or when built with clang. See https://codereview.chromium.org/18566009/
|
||||
for (int i = static_cast<int>(
|
||||
ceil((block_size_ - virtual_source_idx_) / current_io_ratio));
|
||||
i > 0; --i) {
|
||||
RTC_DCHECK_LT(virtual_source_idx_, block_size_);
|
||||
|
||||
// `virtual_source_idx_` lies in between two kernel offsets so figure out
|
||||
// what they are.
|
||||
const int source_idx = static_cast<int>(virtual_source_idx_);
|
||||
const double subsample_remainder = virtual_source_idx_ - source_idx;
|
||||
|
||||
const double virtual_offset_idx =
|
||||
subsample_remainder * kKernelOffsetCount;
|
||||
const int offset_idx = static_cast<int>(virtual_offset_idx);
|
||||
|
||||
// We'll compute "convolutions" for the two kernels which straddle
|
||||
// `virtual_source_idx_`.
|
||||
const float* const k1 = kernel_ptr + offset_idx * kKernelSize;
|
||||
const float* const k2 = k1 + kKernelSize;
|
||||
|
||||
// Ensure `k1`, `k2` are 32-byte aligned for SIMD usage. Should always be
|
||||
// true so long as kKernelSize is a multiple of 32.
|
||||
RTC_DCHECK_EQ(0, reinterpret_cast<uintptr_t>(k1) % 32);
|
||||
RTC_DCHECK_EQ(0, reinterpret_cast<uintptr_t>(k2) % 32);
|
||||
|
||||
// Initialize input pointer based on quantized `virtual_source_idx_`.
|
||||
const float* const input_ptr = r1_ + source_idx;
|
||||
|
||||
// Figure out how much to weight each kernel's "convolution".
|
||||
const double kernel_interpolation_factor =
|
||||
virtual_offset_idx - offset_idx;
|
||||
*destination++ =
|
||||
convolve_proc_(input_ptr, k1, k2, kernel_interpolation_factor);
|
||||
|
||||
// Advance the virtual index.
|
||||
virtual_source_idx_ += current_io_ratio;
|
||||
|
||||
if (!--remaining_frames)
|
||||
return;
|
||||
}
|
||||
|
||||
// Wrap back around to the start.
|
||||
virtual_source_idx_ -= block_size_;
|
||||
|
||||
// Step (3) -- Copy r3_, r4_ to r1_, r2_.
|
||||
// This wraps the last input frames back to the start of the buffer.
|
||||
memcpy(r1_, r3_, sizeof(*input_buffer_.get()) * kKernelSize);
|
||||
|
||||
// Step (4) -- Reinitialize regions if necessary.
|
||||
if (r0_ == r2_)
|
||||
UpdateRegions(true);
|
||||
|
||||
// Step (5) -- Refresh the buffer with more input.
|
||||
read_cb_->Run(request_frames_, r0_);
|
||||
}
|
||||
}
|
||||
|
||||
#undef CONVOLVE_FUNC
|
||||
|
||||
size_t SincResampler::ChunkSize() const {
|
||||
return static_cast<size_t>(block_size_ / io_sample_rate_ratio_);
|
||||
}
|
||||
|
||||
void SincResampler::Flush() {
|
||||
virtual_source_idx_ = 0;
|
||||
buffer_primed_ = false;
|
||||
memset(input_buffer_.get(), 0,
|
||||
sizeof(*input_buffer_.get()) * input_buffer_size_);
|
||||
UpdateRegions(false);
|
||||
}
|
||||
|
||||
float SincResampler::Convolve_C(const float* input_ptr,
|
||||
const float* k1,
|
||||
const float* k2,
|
||||
double kernel_interpolation_factor) {
|
||||
float sum1 = 0;
|
||||
float sum2 = 0;
|
||||
|
||||
// Generate a single output sample. Unrolling this loop hurt performance in
|
||||
// local testing.
|
||||
size_t n = kKernelSize;
|
||||
while (n--) {
|
||||
sum1 += *input_ptr * *k1++;
|
||||
sum2 += *input_ptr++ * *k2++;
|
||||
}
|
||||
|
||||
// Linearly interpolate the two "convolutions".
|
||||
return static_cast<float>((1.0 - kernel_interpolation_factor) * sum1 +
|
||||
kernel_interpolation_factor * sum2);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
181
VocieProcess/common_audio/resampler/sinc_resampler.h
Normal file
181
VocieProcess/common_audio/resampler/sinc_resampler.h
Normal file
@ -0,0 +1,181 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Modified from the Chromium original here:
|
||||
// src/media/base/sinc_resampler.h
|
||||
|
||||
#ifndef COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_
|
||||
#define COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "rtc_base/gtest_prod_util.h"
|
||||
#include "rtc_base/memory/aligned_malloc.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Callback class for providing more data into the resampler. Expects `frames`
|
||||
// of data to be rendered into `destination`; zero padded if not enough frames
|
||||
// are available to satisfy the request.
|
||||
class SincResamplerCallback {
|
||||
public:
|
||||
virtual ~SincResamplerCallback() {}
|
||||
virtual void Run(size_t frames, float* destination) = 0;
|
||||
};
|
||||
|
||||
// SincResampler is a high-quality single-channel sample-rate converter.
|
||||
class SincResampler {
|
||||
public:
|
||||
// The kernel size can be adjusted for quality (higher is better) at the
|
||||
// expense of performance. Must be a multiple of 32.
|
||||
// TODO(dalecurtis): Test performance to see if we can jack this up to 64+.
|
||||
static const size_t kKernelSize = 32;
|
||||
|
||||
// Default request size. Affects how often and for how much SincResampler
|
||||
// calls back for input. Must be greater than kKernelSize.
|
||||
static const size_t kDefaultRequestSize = 512;
|
||||
|
||||
// The kernel offset count is used for interpolation and is the number of
|
||||
// sub-sample kernel shifts. Can be adjusted for quality (higher is better)
|
||||
// at the expense of allocating more memory.
|
||||
static const size_t kKernelOffsetCount = 32;
|
||||
static const size_t kKernelStorageSize =
|
||||
kKernelSize * (kKernelOffsetCount + 1);
|
||||
|
||||
// Constructs a SincResampler with the specified `read_cb`, which is used to
|
||||
// acquire audio data for resampling. `io_sample_rate_ratio` is the ratio
|
||||
// of input / output sample rates. `request_frames` controls the size in
|
||||
// frames of the buffer requested by each `read_cb` call. The value must be
|
||||
// greater than kKernelSize. Specify kDefaultRequestSize if there are no
|
||||
// request size constraints.
|
||||
SincResampler(double io_sample_rate_ratio,
|
||||
size_t request_frames,
|
||||
SincResamplerCallback* read_cb);
|
||||
virtual ~SincResampler();
|
||||
|
||||
SincResampler(const SincResampler&) = delete;
|
||||
SincResampler& operator=(const SincResampler&) = delete;
|
||||
|
||||
// Resample `frames` of data from `read_cb_` into `destination`.
|
||||
void Resample(size_t frames, float* destination);
|
||||
|
||||
// The maximum size in frames that guarantees Resample() will only make a
|
||||
// single call to `read_cb_` for more data.
|
||||
size_t ChunkSize() const;
|
||||
|
||||
size_t request_frames() const { return request_frames_; }
|
||||
|
||||
// Flush all buffered data and reset internal indices. Not thread safe, do
|
||||
// not call while Resample() is in progress.
|
||||
void Flush();
|
||||
|
||||
// Update `io_sample_rate_ratio_`. SetRatio() will cause a reconstruction of
|
||||
// the kernels used for resampling. Not thread safe, do not call while
|
||||
// Resample() is in progress.
|
||||
//
|
||||
// TODO(ajm): Use this in PushSincResampler rather than reconstructing
|
||||
// SincResampler. We would also need a way to update `request_frames_`.
|
||||
void SetRatio(double io_sample_rate_ratio);
|
||||
|
||||
float* get_kernel_for_testing() { return kernel_storage_.get(); }
|
||||
|
||||
private:
|
||||
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, Convolve);
|
||||
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, ConvolveBenchmark);
|
||||
|
||||
void InitializeKernel();
|
||||
void UpdateRegions(bool second_load);
|
||||
|
||||
// Selects runtime specific CPU features like SSE. Must be called before
|
||||
// using SincResampler.
|
||||
// TODO(ajm): Currently managed by the class internally. See the note with
|
||||
// `convolve_proc_` below.
|
||||
void InitializeCPUSpecificFeatures();
|
||||
|
||||
// Compute convolution of `k1` and `k2` over `input_ptr`, resultant sums are
|
||||
// linearly interpolated using `kernel_interpolation_factor`. On x86 and ARM
|
||||
// the underlying implementation is chosen at run time.
|
||||
static float Convolve_C(const float* input_ptr,
|
||||
const float* k1,
|
||||
const float* k2,
|
||||
double kernel_interpolation_factor);
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
static float Convolve_SSE(const float* input_ptr,
|
||||
const float* k1,
|
||||
const float* k2,
|
||||
double kernel_interpolation_factor);
|
||||
static float Convolve_AVX2(const float* input_ptr,
|
||||
const float* k1,
|
||||
const float* k2,
|
||||
double kernel_interpolation_factor);
|
||||
#elif defined(WEBRTC_HAS_NEON)
|
||||
static float Convolve_NEON(const float* input_ptr,
|
||||
const float* k1,
|
||||
const float* k2,
|
||||
double kernel_interpolation_factor);
|
||||
#endif
|
||||
|
||||
// The ratio of input / output sample rates.
|
||||
double io_sample_rate_ratio_;
|
||||
|
||||
// An index on the source input buffer with sub-sample precision. It must be
|
||||
// double precision to avoid drift.
|
||||
double virtual_source_idx_;
|
||||
|
||||
// The buffer is primed once at the very beginning of processing.
|
||||
bool buffer_primed_;
|
||||
|
||||
// Source of data for resampling.
|
||||
SincResamplerCallback* read_cb_;
|
||||
|
||||
// The size (in samples) to request from each `read_cb_` execution.
|
||||
const size_t request_frames_;
|
||||
|
||||
// The number of source frames processed per pass.
|
||||
size_t block_size_;
|
||||
|
||||
// The size (in samples) of the internal buffer used by the resampler.
|
||||
const size_t input_buffer_size_;
|
||||
|
||||
// Contains kKernelOffsetCount kernels back-to-back, each of size kKernelSize.
|
||||
// The kernel offsets are sub-sample shifts of a windowed sinc shifted from
|
||||
// 0.0 to 1.0 sample.
|
||||
std::unique_ptr<float[], AlignedFreeDeleter> kernel_storage_;
|
||||
std::unique_ptr<float[], AlignedFreeDeleter> kernel_pre_sinc_storage_;
|
||||
std::unique_ptr<float[], AlignedFreeDeleter> kernel_window_storage_;
|
||||
|
||||
// Data from the source is copied into this buffer for each processing pass.
|
||||
std::unique_ptr<float[], AlignedFreeDeleter> input_buffer_;
|
||||
|
||||
// Stores the runtime selection of which Convolve function to use.
|
||||
// TODO(ajm): Move to using a global static which must only be initialized
|
||||
// once by the user. We're not doing this initially, because we don't have
|
||||
// e.g. a LazyInstance helper in webrtc.
|
||||
typedef float (*ConvolveProc)(const float*,
|
||||
const float*,
|
||||
const float*,
|
||||
double);
|
||||
ConvolveProc convolve_proc_;
|
||||
|
||||
// Pointers to the various regions inside `input_buffer_`. See the diagram at
|
||||
// the top of the .cc file for more information.
|
||||
float* r0_;
|
||||
float* const r1_;
|
||||
float* const r2_;
|
||||
float* r3_;
|
||||
float* r4_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_
|
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "common_audio/signal_processing/dot_product_with_scale.h"
|
||||
|
||||
#include "rtc_base/numerics/safe_conversions.h"
|
||||
|
||||
int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
|
||||
const int16_t* vector2,
|
||||
size_t length,
|
||||
int scaling) {
|
||||
int64_t sum = 0;
|
||||
size_t i = 0;
|
||||
|
||||
/* Unroll the loop to improve performance. */
|
||||
for (i = 0; i + 3 < length; i += 4) {
|
||||
sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
|
||||
sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
|
||||
sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
|
||||
sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
|
||||
}
|
||||
for (; i < length; i++) {
|
||||
sum += (vector1[i] * vector2[i]) >> scaling;
|
||||
}
|
||||
|
||||
return rtc::saturated_cast<int32_t>(sum);
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
|
||||
#define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Calculates the dot product between two (int16_t) vectors.
|
||||
//
|
||||
// Input:
|
||||
// - vector1 : Vector 1
|
||||
// - vector2 : Vector 2
|
||||
// - vector_length : Number of samples used in the dot product
|
||||
// - scaling : The number of right bit shifts to apply on each term
|
||||
// during calculation to avoid overflow, i.e., the
|
||||
// output will be in Q(-`scaling`)
|
||||
//
|
||||
// Return value : The dot product in Q(-scaling)
|
||||
int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
|
||||
const int16_t* vector2,
|
||||
size_t length,
|
||||
int scaling);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
|
File diff suppressed because it is too large
Load Diff
155
VocieProcess/common_audio/signal_processing/include/spl_inl.h
Normal file
155
VocieProcess/common_audio/signal_processing/include/spl_inl.h
Normal file
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// This header file includes the inline functions in
|
||||
// the fix point signal processing library.
|
||||
|
||||
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
|
||||
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "rtc_base/compile_assert_c.h"
|
||||
|
||||
extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64];
|
||||
|
||||
// Don't call this directly except in tests!
|
||||
static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) {
|
||||
// Normalize n by rounding up to the nearest number that is a sequence of 0
|
||||
// bits followed by a sequence of 1 bits. This number has the same number of
|
||||
// leading zeros as the original n. There are exactly 33 such values.
|
||||
n |= n >> 1;
|
||||
n |= n >> 2;
|
||||
n |= n >> 4;
|
||||
n |= n >> 8;
|
||||
n |= n >> 16;
|
||||
|
||||
// Multiply the modified n with a constant selected (by exhaustive search)
|
||||
// such that each of the 33 possible values of n give a product whose 6 most
|
||||
// significant bits are unique. Then look up the answer in the table.
|
||||
return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26];
|
||||
}
|
||||
|
||||
// Don't call this directly except in tests!
|
||||
static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) {
|
||||
const int leading_zeros = n >> 32 == 0 ? 32 : 0;
|
||||
return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin(
|
||||
(uint32_t)(n >> (32 - leading_zeros)));
|
||||
}
|
||||
|
||||
// Returns the number of leading zero bits in the argument.
|
||||
static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) {
|
||||
#ifdef __GNUC__
|
||||
RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
|
||||
return n == 0 ? 32 : __builtin_clz(n);
|
||||
#else
|
||||
return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Returns the number of leading zero bits in the argument.
|
||||
static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) {
|
||||
#ifdef __GNUC__
|
||||
RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t)); // NOLINT
|
||||
return n == 0 ? 64 : __builtin_clzll(n);
|
||||
#else
|
||||
return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_ARCH_ARM_V7
|
||||
#include "common_audio/signal_processing/include/spl_inl_armv7.h"
|
||||
#else
|
||||
|
||||
#if defined(MIPS32_LE)
|
||||
#include "common_audio/signal_processing/include/spl_inl_mips.h"
|
||||
#endif
|
||||
|
||||
#if !defined(MIPS_DSP_R1_LE)
|
||||
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
|
||||
int16_t out16 = (int16_t)value32;
|
||||
|
||||
if (value32 > 32767)
|
||||
out16 = 32767;
|
||||
else if (value32 < -32768)
|
||||
out16 = -32768;
|
||||
|
||||
return out16;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) {
|
||||
// Do the addition in unsigned numbers, since signed overflow is undefined
|
||||
// behavior.
|
||||
const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b);
|
||||
|
||||
// a + b can't overflow if a and b have different signs. If they have the
|
||||
// same sign, a + b also has the same sign iff it didn't overflow.
|
||||
if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) {
|
||||
// The direction of the overflow is obvious from the sign of a + b.
|
||||
return sum < 0 ? INT32_MAX : INT32_MIN;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) {
|
||||
// Do the subtraction in unsigned numbers, since signed overflow is undefined
|
||||
// behavior.
|
||||
const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b);
|
||||
|
||||
// a - b can't overflow if a and b have the same sign. If they have different
|
||||
// signs, a - b has the same sign as a iff it didn't overflow.
|
||||
if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) {
|
||||
// The direction of the overflow is obvious from the sign of a - b.
|
||||
return diff < 0 ? INT32_MAX : INT32_MIN;
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
|
||||
return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b);
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
|
||||
return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2);
|
||||
}
|
||||
#endif // #if !defined(MIPS_DSP_R1_LE)
|
||||
|
||||
#if !defined(MIPS32_LE)
|
||||
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
|
||||
return 32 - WebRtcSpl_CountLeadingZeros32(n);
|
||||
}
|
||||
|
||||
// Return the number of steps a can be left-shifted without overflow,
|
||||
// or 0 if a == 0.
|
||||
static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
|
||||
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1;
|
||||
}
|
||||
|
||||
// Return the number of steps a can be left-shifted without overflow,
|
||||
// or 0 if a == 0.
|
||||
static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
|
||||
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a);
|
||||
}
|
||||
|
||||
// Return the number of steps a can be left-shifted without overflow,
|
||||
// or 0 if a == 0.
|
||||
static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
|
||||
const int32_t a32 = a;
|
||||
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
|
||||
return (a * b + c);
|
||||
}
|
||||
#endif // #if !defined(MIPS32_LE)
|
||||
|
||||
#endif // WEBRTC_ARCH_ARM_V7
|
||||
|
||||
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
|
548
VocieProcess/common_audio/third_party/ooura/fft_size_128/ooura_fft.cc
vendored
Normal file
548
VocieProcess/common_audio/third_party/ooura/fft_size_128/ooura_fft.cc
vendored
Normal file
@ -0,0 +1,548 @@
|
||||
/*
|
||||
* http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
|
||||
* Copyright Takuya OOURA, 1996-2001
|
||||
*
|
||||
* You may use, copy, modify and distribute this code for any purpose (include
|
||||
* commercial use) and without fee. Please refer to this package when you modify
|
||||
* this code.
|
||||
*
|
||||
* Changes by the WebRTC authors:
|
||||
* - Trivial type modifications.
|
||||
* - Minimal code subset to do rdft of length 128.
|
||||
* - Optimizations because of known length.
|
||||
* - Removed the global variables by moving the code in to a class in order
|
||||
* to make it thread safe.
|
||||
*
|
||||
* All changes are covered by the WebRTC license and IP grant:
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
|
||||
|
||||
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft_tables_common.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
#include "system_wrappers/include/cpu_features_wrapper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
#if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON))
|
||||
static void cft1st_128_C(float* a) {
|
||||
const int n = 128;
|
||||
int j, k1, k2;
|
||||
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
// The processing of the first set of elements was simplified in C to avoid
|
||||
// some operations (multiplication by zero or one, addition of two elements
|
||||
// multiplied by the same weight, ...).
|
||||
x0r = a[0] + a[2];
|
||||
x0i = a[1] + a[3];
|
||||
x1r = a[0] - a[2];
|
||||
x1i = a[1] - a[3];
|
||||
x2r = a[4] + a[6];
|
||||
x2i = a[5] + a[7];
|
||||
x3r = a[4] - a[6];
|
||||
x3i = a[5] - a[7];
|
||||
a[0] = x0r + x2r;
|
||||
a[1] = x0i + x2i;
|
||||
a[4] = x0r - x2r;
|
||||
a[5] = x0i - x2i;
|
||||
a[2] = x1r - x3i;
|
||||
a[3] = x1i + x3r;
|
||||
a[6] = x1r + x3i;
|
||||
a[7] = x1i - x3r;
|
||||
wk1r = rdft_w[2];
|
||||
x0r = a[8] + a[10];
|
||||
x0i = a[9] + a[11];
|
||||
x1r = a[8] - a[10];
|
||||
x1i = a[9] - a[11];
|
||||
x2r = a[12] + a[14];
|
||||
x2i = a[13] + a[15];
|
||||
x3r = a[12] - a[14];
|
||||
x3i = a[13] - a[15];
|
||||
a[8] = x0r + x2r;
|
||||
a[9] = x0i + x2i;
|
||||
a[12] = x2i - x0i;
|
||||
a[13] = x0r - x2r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[10] = wk1r * (x0r - x0i);
|
||||
a[11] = wk1r * (x0r + x0i);
|
||||
x0r = x3i + x1r;
|
||||
x0i = x3r - x1i;
|
||||
a[14] = wk1r * (x0i - x0r);
|
||||
a[15] = wk1r * (x0i + x0r);
|
||||
k1 = 0;
|
||||
for (j = 16; j < n; j += 16) {
|
||||
k1 += 2;
|
||||
k2 = 2 * k1;
|
||||
wk2r = rdft_w[k1 + 0];
|
||||
wk2i = rdft_w[k1 + 1];
|
||||
wk1r = rdft_w[k2 + 0];
|
||||
wk1i = rdft_w[k2 + 1];
|
||||
wk3r = rdft_wk3ri_first[k1 + 0];
|
||||
wk3i = rdft_wk3ri_first[k1 + 1];
|
||||
x0r = a[j + 0] + a[j + 2];
|
||||
x0i = a[j + 1] + a[j + 3];
|
||||
x1r = a[j + 0] - a[j + 2];
|
||||
x1i = a[j + 1] - a[j + 3];
|
||||
x2r = a[j + 4] + a[j + 6];
|
||||
x2i = a[j + 5] + a[j + 7];
|
||||
x3r = a[j + 4] - a[j + 6];
|
||||
x3i = a[j + 5] - a[j + 7];
|
||||
a[j + 0] = x0r + x2r;
|
||||
a[j + 1] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j + 4] = wk2r * x0r - wk2i * x0i;
|
||||
a[j + 5] = wk2r * x0i + wk2i * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j + 2] = wk1r * x0r - wk1i * x0i;
|
||||
a[j + 3] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j + 6] = wk3r * x0r - wk3i * x0i;
|
||||
a[j + 7] = wk3r * x0i + wk3i * x0r;
|
||||
wk1r = rdft_w[k2 + 2];
|
||||
wk1i = rdft_w[k2 + 3];
|
||||
wk3r = rdft_wk3ri_second[k1 + 0];
|
||||
wk3i = rdft_wk3ri_second[k1 + 1];
|
||||
x0r = a[j + 8] + a[j + 10];
|
||||
x0i = a[j + 9] + a[j + 11];
|
||||
x1r = a[j + 8] - a[j + 10];
|
||||
x1i = a[j + 9] - a[j + 11];
|
||||
x2r = a[j + 12] + a[j + 14];
|
||||
x2i = a[j + 13] + a[j + 15];
|
||||
x3r = a[j + 12] - a[j + 14];
|
||||
x3i = a[j + 13] - a[j + 15];
|
||||
a[j + 8] = x0r + x2r;
|
||||
a[j + 9] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j + 12] = -wk2i * x0r - wk2r * x0i;
|
||||
a[j + 13] = -wk2i * x0i + wk2r * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j + 10] = wk1r * x0r - wk1i * x0i;
|
||||
a[j + 11] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j + 14] = wk3r * x0r - wk3i * x0i;
|
||||
a[j + 15] = wk3r * x0i + wk3i * x0r;
|
||||
}
|
||||
}
|
||||
|
||||
static void cftmdl_128_C(float* a) {
|
||||
const int l = 8;
|
||||
const int n = 128;
|
||||
const int m = 32;
|
||||
int j0, j1, j2, j3, k, k1, k2, m2;
|
||||
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
for (j0 = 0; j0 < l; j0 += 2) {
|
||||
j1 = j0 + 8;
|
||||
j2 = j0 + 16;
|
||||
j3 = j0 + 24;
|
||||
x0r = a[j0 + 0] + a[j1 + 0];
|
||||
x0i = a[j0 + 1] + a[j1 + 1];
|
||||
x1r = a[j0 + 0] - a[j1 + 0];
|
||||
x1i = a[j0 + 1] - a[j1 + 1];
|
||||
x2r = a[j2 + 0] + a[j3 + 0];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2 + 0] - a[j3 + 0];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j0 + 0] = x0r + x2r;
|
||||
a[j0 + 1] = x0i + x2i;
|
||||
a[j2 + 0] = x0r - x2r;
|
||||
a[j2 + 1] = x0i - x2i;
|
||||
a[j1 + 0] = x1r - x3i;
|
||||
a[j1 + 1] = x1i + x3r;
|
||||
a[j3 + 0] = x1r + x3i;
|
||||
a[j3 + 1] = x1i - x3r;
|
||||
}
|
||||
wk1r = rdft_w[2];
|
||||
for (j0 = m; j0 < l + m; j0 += 2) {
|
||||
j1 = j0 + 8;
|
||||
j2 = j0 + 16;
|
||||
j3 = j0 + 24;
|
||||
x0r = a[j0 + 0] + a[j1 + 0];
|
||||
x0i = a[j0 + 1] + a[j1 + 1];
|
||||
x1r = a[j0 + 0] - a[j1 + 0];
|
||||
x1i = a[j0 + 1] - a[j1 + 1];
|
||||
x2r = a[j2 + 0] + a[j3 + 0];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2 + 0] - a[j3 + 0];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j0 + 0] = x0r + x2r;
|
||||
a[j0 + 1] = x0i + x2i;
|
||||
a[j2 + 0] = x2i - x0i;
|
||||
a[j2 + 1] = x0r - x2r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j1 + 0] = wk1r * (x0r - x0i);
|
||||
a[j1 + 1] = wk1r * (x0r + x0i);
|
||||
x0r = x3i + x1r;
|
||||
x0i = x3r - x1i;
|
||||
a[j3 + 0] = wk1r * (x0i - x0r);
|
||||
a[j3 + 1] = wk1r * (x0i + x0r);
|
||||
}
|
||||
k1 = 0;
|
||||
m2 = 2 * m;
|
||||
for (k = m2; k < n; k += m2) {
|
||||
k1 += 2;
|
||||
k2 = 2 * k1;
|
||||
wk2r = rdft_w[k1 + 0];
|
||||
wk2i = rdft_w[k1 + 1];
|
||||
wk1r = rdft_w[k2 + 0];
|
||||
wk1i = rdft_w[k2 + 1];
|
||||
wk3r = rdft_wk3ri_first[k1 + 0];
|
||||
wk3i = rdft_wk3ri_first[k1 + 1];
|
||||
for (j0 = k; j0 < l + k; j0 += 2) {
|
||||
j1 = j0 + 8;
|
||||
j2 = j0 + 16;
|
||||
j3 = j0 + 24;
|
||||
x0r = a[j0 + 0] + a[j1 + 0];
|
||||
x0i = a[j0 + 1] + a[j1 + 1];
|
||||
x1r = a[j0 + 0] - a[j1 + 0];
|
||||
x1i = a[j0 + 1] - a[j1 + 1];
|
||||
x2r = a[j2 + 0] + a[j3 + 0];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2 + 0] - a[j3 + 0];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j0 + 0] = x0r + x2r;
|
||||
a[j0 + 1] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j2 + 0] = wk2r * x0r - wk2i * x0i;
|
||||
a[j2 + 1] = wk2r * x0i + wk2i * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j1 + 0] = wk1r * x0r - wk1i * x0i;
|
||||
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j3 + 0] = wk3r * x0r - wk3i * x0i;
|
||||
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
|
||||
}
|
||||
wk1r = rdft_w[k2 + 2];
|
||||
wk1i = rdft_w[k2 + 3];
|
||||
wk3r = rdft_wk3ri_second[k1 + 0];
|
||||
wk3i = rdft_wk3ri_second[k1 + 1];
|
||||
for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
|
||||
j1 = j0 + 8;
|
||||
j2 = j0 + 16;
|
||||
j3 = j0 + 24;
|
||||
x0r = a[j0 + 0] + a[j1 + 0];
|
||||
x0i = a[j0 + 1] + a[j1 + 1];
|
||||
x1r = a[j0 + 0] - a[j1 + 0];
|
||||
x1i = a[j0 + 1] - a[j1 + 1];
|
||||
x2r = a[j2 + 0] + a[j3 + 0];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2 + 0] - a[j3 + 0];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j0 + 0] = x0r + x2r;
|
||||
a[j0 + 1] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
|
||||
a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j1 + 0] = wk1r * x0r - wk1i * x0i;
|
||||
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j3 + 0] = wk3r * x0r - wk3i * x0i;
|
||||
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void rftfsub_128_C(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2, k1, k2;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
|
||||
k2 = 128 - j2;
|
||||
k1 = 32 - j1;
|
||||
wkr = 0.5f - c[k1];
|
||||
wki = c[j1];
|
||||
xr = a[j2 + 0] - a[k2 + 0];
|
||||
xi = a[j2 + 1] + a[k2 + 1];
|
||||
yr = wkr * xr - wki * xi;
|
||||
yi = wkr * xi + wki * xr;
|
||||
a[j2 + 0] -= yr;
|
||||
a[j2 + 1] -= yi;
|
||||
a[k2 + 0] += yr;
|
||||
a[k2 + 1] -= yi;
|
||||
}
|
||||
}
|
||||
|
||||
static void rftbsub_128_C(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2, k1, k2;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
a[1] = -a[1];
|
||||
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
|
||||
k2 = 128 - j2;
|
||||
k1 = 32 - j1;
|
||||
wkr = 0.5f - c[k1];
|
||||
wki = c[j1];
|
||||
xr = a[j2 + 0] - a[k2 + 0];
|
||||
xi = a[j2 + 1] + a[k2 + 1];
|
||||
yr = wkr * xr + wki * xi;
|
||||
yi = wkr * xi - wki * xr;
|
||||
a[j2 + 0] = a[j2 + 0] - yr;
|
||||
a[j2 + 1] = yi - a[j2 + 1];
|
||||
a[k2 + 0] = yr + a[k2 + 0];
|
||||
a[k2 + 1] = yi - a[k2 + 1];
|
||||
}
|
||||
a[65] = -a[65];
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
OouraFft::OouraFft(bool sse2_available) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
use_sse2_ = sse2_available;
|
||||
#else
|
||||
use_sse2_ = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
OouraFft::OouraFft() {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
use_sse2_ = (GetCPUInfo(kSSE2) != 0);
|
||||
#else
|
||||
use_sse2_ = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
OouraFft::~OouraFft() = default;
|
||||
|
||||
void OouraFft::Fft(float* a) const {
|
||||
float xi;
|
||||
bitrv2_128(a);
|
||||
cftfsub_128(a);
|
||||
rftfsub_128(a);
|
||||
xi = a[0] - a[1];
|
||||
a[0] += a[1];
|
||||
a[1] = xi;
|
||||
}
|
||||
void OouraFft::InverseFft(float* a) const {
|
||||
a[1] = 0.5f * (a[0] - a[1]);
|
||||
a[0] -= a[1];
|
||||
rftbsub_128(a);
|
||||
bitrv2_128(a);
|
||||
cftbsub_128(a);
|
||||
}
|
||||
|
||||
void OouraFft::cft1st_128(float* a) const {
|
||||
#if defined(MIPS_FPU_LE)
|
||||
cft1st_128_mips(a);
|
||||
#elif defined(WEBRTC_HAS_NEON)
|
||||
cft1st_128_neon(a);
|
||||
#elif defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
if (use_sse2_) {
|
||||
cft1st_128_SSE2(a);
|
||||
} else {
|
||||
cft1st_128_C(a);
|
||||
}
|
||||
#else
|
||||
cft1st_128_C(a);
|
||||
#endif
|
||||
}
|
||||
void OouraFft::cftmdl_128(float* a) const {
|
||||
#if defined(MIPS_FPU_LE)
|
||||
cftmdl_128_mips(a);
|
||||
#elif defined(WEBRTC_HAS_NEON)
|
||||
cftmdl_128_neon(a);
|
||||
#elif defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
if (use_sse2_) {
|
||||
cftmdl_128_SSE2(a);
|
||||
} else {
|
||||
cftmdl_128_C(a);
|
||||
}
|
||||
#else
|
||||
cftmdl_128_C(a);
|
||||
#endif
|
||||
}
|
||||
void OouraFft::rftfsub_128(float* a) const {
|
||||
#if defined(MIPS_FPU_LE)
|
||||
rftfsub_128_mips(a);
|
||||
#elif defined(WEBRTC_HAS_NEON)
|
||||
rftfsub_128_neon(a);
|
||||
#elif defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
if (use_sse2_) {
|
||||
rftfsub_128_SSE2(a);
|
||||
} else {
|
||||
rftfsub_128_C(a);
|
||||
}
|
||||
#else
|
||||
rftfsub_128_C(a);
|
||||
#endif
|
||||
}
|
||||
|
||||
void OouraFft::rftbsub_128(float* a) const {
|
||||
#if defined(MIPS_FPU_LE)
|
||||
rftbsub_128_mips(a);
|
||||
#elif defined(WEBRTC_HAS_NEON)
|
||||
rftbsub_128_neon(a);
|
||||
#elif defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
if (use_sse2_) {
|
||||
rftbsub_128_SSE2(a);
|
||||
} else {
|
||||
rftbsub_128_C(a);
|
||||
}
|
||||
#else
|
||||
rftbsub_128_C(a);
|
||||
#endif
|
||||
}
|
||||
|
||||
void OouraFft::cftbsub_128(float* a) const {
|
||||
int j, j1, j2, j3, l;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
cft1st_128(a);
|
||||
cftmdl_128(a);
|
||||
l = 32;
|
||||
|
||||
for (j = 0; j < l; j += 2) {
|
||||
j1 = j + l;
|
||||
j2 = j1 + l;
|
||||
j3 = j2 + l;
|
||||
x0r = a[j] + a[j1];
|
||||
x0i = -a[j + 1] - a[j1 + 1];
|
||||
x1r = a[j] - a[j1];
|
||||
x1i = -a[j + 1] + a[j1 + 1];
|
||||
x2r = a[j2] + a[j3];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2] - a[j3];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i - x2i;
|
||||
a[j2] = x0r - x2r;
|
||||
a[j2 + 1] = x0i + x2i;
|
||||
a[j1] = x1r - x3i;
|
||||
a[j1 + 1] = x1i - x3r;
|
||||
a[j3] = x1r + x3i;
|
||||
a[j3 + 1] = x1i + x3r;
|
||||
}
|
||||
}
|
||||
|
||||
void OouraFft::cftfsub_128(float* a) const {
|
||||
int j, j1, j2, j3, l;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
cft1st_128(a);
|
||||
cftmdl_128(a);
|
||||
l = 32;
|
||||
for (j = 0; j < l; j += 2) {
|
||||
j1 = j + l;
|
||||
j2 = j1 + l;
|
||||
j3 = j2 + l;
|
||||
x0r = a[j] + a[j1];
|
||||
x0i = a[j + 1] + a[j1 + 1];
|
||||
x1r = a[j] - a[j1];
|
||||
x1i = a[j + 1] - a[j1 + 1];
|
||||
x2r = a[j2] + a[j3];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2] - a[j3];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i + x2i;
|
||||
a[j2] = x0r - x2r;
|
||||
a[j2 + 1] = x0i - x2i;
|
||||
a[j1] = x1r - x3i;
|
||||
a[j1 + 1] = x1i + x3r;
|
||||
a[j3] = x1r + x3i;
|
||||
a[j3 + 1] = x1i - x3r;
|
||||
}
|
||||
}
|
||||
|
||||
void OouraFft::bitrv2_128(float* a) const {
|
||||
/*
|
||||
Following things have been attempted but are no faster:
|
||||
(a) Storing the swap indexes in a LUT (index calculations are done
|
||||
for 'free' while waiting on memory/L1).
|
||||
(b) Consolidate the load/store of two consecutive floats by a 64 bit
|
||||
integer (execution is memory/L1 bound).
|
||||
(c) Do a mix of floats and 64 bit integer to maximize register
|
||||
utilization (execution is memory/L1 bound).
|
||||
(d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
|
||||
(e) Hard-coding of the offsets to completely eliminates index
|
||||
calculations.
|
||||
*/
|
||||
|
||||
unsigned int j, j1, k, k1;
|
||||
float xr, xi, yr, yi;
|
||||
|
||||
const int ip[4] = {0, 64, 32, 96};
|
||||
for (k = 0; k < 4; k++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
j1 = 2 * j + ip[k];
|
||||
k1 = 2 * k + ip[j];
|
||||
xr = a[j1 + 0];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1 + 0];
|
||||
yi = a[k1 + 1];
|
||||
a[j1 + 0] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1 + 0] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
j1 += 8;
|
||||
k1 += 16;
|
||||
xr = a[j1 + 0];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1 + 0];
|
||||
yi = a[k1 + 1];
|
||||
a[j1 + 0] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1 + 0] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
j1 += 8;
|
||||
k1 -= 8;
|
||||
xr = a[j1 + 0];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1 + 0];
|
||||
yi = a[k1 + 1];
|
||||
a[j1 + 0] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1 + 0] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
j1 += 8;
|
||||
k1 += 16;
|
||||
xr = a[j1 + 0];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1 + 0];
|
||||
yi = a[k1 + 1];
|
||||
a[j1 + 0] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1 + 0] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
}
|
||||
j1 = 2 * k + 8 + ip[k];
|
||||
k1 = j1 + 8;
|
||||
xr = a[j1 + 0];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1 + 0];
|
||||
yi = a[k1 + 1];
|
||||
a[j1 + 0] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1 + 0] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
64
VocieProcess/common_audio/third_party/ooura/fft_size_128/ooura_fft.h
vendored
Normal file
64
VocieProcess/common_audio/third_party/ooura/fft_size_128/ooura_fft.h
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
|
||||
#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
|
||||
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
void cft1st_128_SSE2(float* a);
|
||||
void cftmdl_128_SSE2(float* a);
|
||||
void rftfsub_128_SSE2(float* a);
|
||||
void rftbsub_128_SSE2(float* a);
|
||||
#endif
|
||||
|
||||
#if defined(MIPS_FPU_LE)
|
||||
void cft1st_128_mips(float* a);
|
||||
void cftmdl_128_mips(float* a);
|
||||
void rftfsub_128_mips(float* a);
|
||||
void rftbsub_128_mips(float* a);
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
void cft1st_128_neon(float* a);
|
||||
void cftmdl_128_neon(float* a);
|
||||
void rftfsub_128_neon(float* a);
|
||||
void rftbsub_128_neon(float* a);
|
||||
#endif
|
||||
|
||||
class OouraFft {
|
||||
public:
|
||||
// Ctor allowing the availability of SSE2 support to be specified.
|
||||
explicit OouraFft(bool sse2_available);
|
||||
|
||||
// Deprecated: This Ctor will soon be removed.
|
||||
OouraFft();
|
||||
~OouraFft();
|
||||
void Fft(float* a) const;
|
||||
void InverseFft(float* a) const;
|
||||
|
||||
private:
|
||||
void cft1st_128(float* a) const;
|
||||
void cftmdl_128(float* a) const;
|
||||
void rftfsub_128(float* a) const;
|
||||
void rftbsub_128(float* a) const;
|
||||
|
||||
void cftfsub_128(float* a) const;
|
||||
void cftbsub_128(float* a) const;
|
||||
void bitrv2_128(float* a) const;
|
||||
bool use_sse2_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
|
54
VocieProcess/common_audio/third_party/ooura/fft_size_128/ooura_fft_tables_common.h
vendored
Normal file
54
VocieProcess/common_audio/third_party/ooura/fft_size_128/ooura_fft_tables_common.h
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
|
||||
#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
|
||||
|
||||
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This tables used to be computed at run-time. For example, refer to:
|
||||
// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564
|
||||
// to see the initialization code.
|
||||
// Constants shared by all paths (C, SSE2, NEON).
|
||||
const float rdft_w[64] = {
|
||||
1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, 0.9238795638f,
|
||||
0.3826834559f, 0.3826834559f, 0.9238795638f, 0.9807852507f, 0.1950903237f,
|
||||
0.5555702448f, 0.8314695954f, 0.8314695954f, 0.5555702448f, 0.1950903237f,
|
||||
0.9807852507f, 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
|
||||
0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, 0.9569403529f,
|
||||
0.2902846634f, 0.4713967443f, 0.8819212914f, 0.7730104327f, 0.6343933344f,
|
||||
0.0980171412f, 0.9951847196f, 0.7071067691f, 0.4993977249f, 0.4975923598f,
|
||||
0.4945882559f, 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
|
||||
0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, 0.4157347977f,
|
||||
0.4016037583f, 0.3865052164f, 0.3704755902f, 0.3535533845f, 0.3357794881f,
|
||||
0.3171966672f, 0.2978496552f, 0.2777851224f, 0.2570513785f, 0.2356983721f,
|
||||
0.2137775421f, 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
|
||||
0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
|
||||
};
|
||||
|
||||
// Constants used by the C and MIPS paths.
|
||||
const float rdft_wk3ri_first[16] = {
|
||||
1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
|
||||
0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
|
||||
0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
|
||||
0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
|
||||
};
|
||||
const float rdft_wk3ri_second[16] = {
|
||||
-0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
|
||||
-0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
|
||||
-0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
|
||||
-0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
|
77
VocieProcess/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
vendored
Normal file
77
VocieProcess/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Written by Wilco Dijkstra, 1996. The following email exchange establishes the
|
||||
* license.
|
||||
*
|
||||
* From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
|
||||
* Date: Fri, Jun 24, 2011 at 3:20 AM
|
||||
* Subject: Re: sqrt routine
|
||||
* To: Kevin Ma <kma@google.com>
|
||||
* Hi Kevin,
|
||||
* Thanks for asking. Those routines are public domain (originally posted to
|
||||
* comp.sys.arm a long time ago), so you can use them freely for any purpose.
|
||||
* Cheers,
|
||||
* Wilco
|
||||
*
|
||||
* ----- Original Message -----
|
||||
* From: "Kevin Ma" <kma@google.com>
|
||||
* To: <Wilco.Dijkstra@ntlworld.com>
|
||||
* Sent: Thursday, June 23, 2011 11:44 PM
|
||||
* Subject: Fwd: sqrt routine
|
||||
* Hi Wilco,
|
||||
* I saw your sqrt routine from several web sites, including
|
||||
* http://www.finesse.demon.co.uk/steven/sqrt.html.
|
||||
* Just wonder if there's any copyright information with your Successive
|
||||
* approximation routines, or if I can freely use it for any purpose.
|
||||
* Thanks.
|
||||
* Kevin
|
||||
*/
|
||||
|
||||
// Minor modifications in code style for WebRTC, 2012.
|
||||
|
||||
#include "common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
|
||||
|
||||
/*
|
||||
* Algorithm:
|
||||
* Successive approximation of the equation (root + delta) ^ 2 = N
|
||||
* until delta < 1. If delta < 1 we have the integer part of SQRT (N).
|
||||
* Use delta = 2^i for i = 15 .. 0.
|
||||
*
|
||||
* Output precision is 16 bits. Note for large input values (close to
|
||||
* 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
|
||||
* contains the MSB information (a non-sign value). Do with caution
|
||||
* if you need to cast the output to int16_t type.
|
||||
*
|
||||
* If the input value is negative, it returns 0.
|
||||
*/
|
||||
|
||||
#define WEBRTC_SPL_SQRT_ITER(N) \
|
||||
try1 = root + (1 << (N)); \
|
||||
if (value >= try1 << (N)) \
|
||||
{ \
|
||||
value -= try1 << (N); \
|
||||
root |= 2 << (N); \
|
||||
}
|
||||
|
||||
int32_t WebRtcSpl_SqrtFloor(int32_t value)
|
||||
{
|
||||
int32_t root = 0, try1;
|
||||
|
||||
WEBRTC_SPL_SQRT_ITER (15);
|
||||
WEBRTC_SPL_SQRT_ITER (14);
|
||||
WEBRTC_SPL_SQRT_ITER (13);
|
||||
WEBRTC_SPL_SQRT_ITER (12);
|
||||
WEBRTC_SPL_SQRT_ITER (11);
|
||||
WEBRTC_SPL_SQRT_ITER (10);
|
||||
WEBRTC_SPL_SQRT_ITER ( 9);
|
||||
WEBRTC_SPL_SQRT_ITER ( 8);
|
||||
WEBRTC_SPL_SQRT_ITER ( 7);
|
||||
WEBRTC_SPL_SQRT_ITER ( 6);
|
||||
WEBRTC_SPL_SQRT_ITER ( 5);
|
||||
WEBRTC_SPL_SQRT_ITER ( 4);
|
||||
WEBRTC_SPL_SQRT_ITER ( 3);
|
||||
WEBRTC_SPL_SQRT_ITER ( 2);
|
||||
WEBRTC_SPL_SQRT_ITER ( 1);
|
||||
WEBRTC_SPL_SQRT_ITER ( 0);
|
||||
|
||||
return root >> 1;
|
||||
}
|
29
VocieProcess/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h
vendored
Normal file
29
VocieProcess/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
//
|
||||
// WebRtcSpl_SqrtFloor(...)
|
||||
//
|
||||
// Returns the square root of the input value `value`. The precision of this
|
||||
// function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
|
||||
// If `value` is a negative number then 0 is returned.
|
||||
//
|
||||
// Algorithm:
|
||||
//
|
||||
// An iterative 4 cylce/bit routine
|
||||
//
|
||||
// Input:
|
||||
// - value : Value to calculate sqrt of
|
||||
//
|
||||
// Return value : Result of the sqrt calculation
|
||||
//
|
||||
int32_t WebRtcSpl_SqrtFloor(int32_t value);
|
@ -0,0 +1,744 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter.h"
|
||||
|
||||
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
// Computes and stores the frequency response of the filter.
|
||||
void ComputeFrequencyResponse(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
|
||||
for (auto& H2_ch : *H2) {
|
||||
H2_ch.fill(0.f);
|
||||
}
|
||||
|
||||
const size_t num_render_channels = H[0].size();
|
||||
RTC_DCHECK_EQ(H.size(), H2->capacity());
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
|
||||
float tmp =
|
||||
H[p][ch].re[j] * H[p][ch].re[j] + H[p][ch].im[j] * H[p][ch].im[j];
|
||||
(*H2)[p][j] = std::max((*H2)[p][j], tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
// Computes and stores the frequency response of the filter.
|
||||
void ComputeFrequencyResponse_Neon(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
|
||||
for (auto& H2_ch : *H2) {
|
||||
H2_ch.fill(0.f);
|
||||
}
|
||||
|
||||
const size_t num_render_channels = H[0].size();
|
||||
RTC_DCHECK_EQ(H.size(), H2->capacity());
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
|
||||
auto& H2_p = (*H2)[p];
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
for (size_t j = 0; j < kFftLengthBy2; j += 4) {
|
||||
const float32x4_t re = vld1q_f32(&H_p_ch.re[j]);
|
||||
const float32x4_t im = vld1q_f32(&H_p_ch.im[j]);
|
||||
float32x4_t H2_new = vmulq_f32(re, re);
|
||||
H2_new = vmlaq_f32(H2_new, im, im);
|
||||
float32x4_t H2_p_j = vld1q_f32(&H2_p[j]);
|
||||
H2_p_j = vmaxq_f32(H2_p_j, H2_new);
|
||||
vst1q_f32(&H2_p[j], H2_p_j);
|
||||
}
|
||||
float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] +
|
||||
H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
|
||||
H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
// Computes and stores the frequency response of the filter.
|
||||
void ComputeFrequencyResponse_Sse2(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
|
||||
for (auto& H2_ch : *H2) {
|
||||
H2_ch.fill(0.f);
|
||||
}
|
||||
|
||||
const size_t num_render_channels = H[0].size();
|
||||
RTC_DCHECK_EQ(H.size(), H2->capacity());
|
||||
// constexpr __mmmask8 kMaxMask = static_cast<__mmmask8>(256u);
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
|
||||
auto& H2_p = (*H2)[p];
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
for (size_t j = 0; j < kFftLengthBy2; j += 4) {
|
||||
const __m128 re = _mm_loadu_ps(&H_p_ch.re[j]);
|
||||
const __m128 re2 = _mm_mul_ps(re, re);
|
||||
const __m128 im = _mm_loadu_ps(&H_p_ch.im[j]);
|
||||
const __m128 im2 = _mm_mul_ps(im, im);
|
||||
const __m128 H2_new = _mm_add_ps(re2, im2);
|
||||
__m128 H2_k_j = _mm_loadu_ps(&H2_p[j]);
|
||||
H2_k_j = _mm_max_ps(H2_k_j, H2_new);
|
||||
_mm_storeu_ps(&H2_p[j], H2_k_j);
|
||||
}
|
||||
float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] +
|
||||
H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
|
||||
H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Adapts the filter partitions as H(t+1)=H(t)+G(t)*conj(X(t)).
|
||||
void AdaptPartitions(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H) {
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
size_t index = render_buffer.Position();
|
||||
const size_t num_render_channels = render_buffer_data[index].size();
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& X_p_ch = render_buffer_data[index][ch];
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
H_p_ch.re[k] += X_p_ch.re[k] * G.re[k] + X_p_ch.im[k] * G.im[k];
|
||||
H_p_ch.im[k] += X_p_ch.re[k] * G.im[k] - X_p_ch.im[k] * G.re[k];
|
||||
}
|
||||
}
|
||||
index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
// Adapts the filter partitions. (Neon variant)
|
||||
void AdaptPartitions_Neon(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H) {
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
const size_t num_render_channels = render_buffer_data[0].size();
|
||||
const size_t lim1 = std::min(
|
||||
render_buffer_data.size() - render_buffer.Position(), num_partitions);
|
||||
const size_t lim2 = num_partitions;
|
||||
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
|
||||
|
||||
size_t X_partition = render_buffer.Position();
|
||||
size_t limit = lim1;
|
||||
size_t p = 0;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
|
||||
const float32x4_t G_re = vld1q_f32(&G.re[k]);
|
||||
const float32x4_t G_im = vld1q_f32(&G.im[k]);
|
||||
const float32x4_t X_re = vld1q_f32(&X.re[k]);
|
||||
const float32x4_t X_im = vld1q_f32(&X.im[k]);
|
||||
const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]);
|
||||
const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]);
|
||||
const float32x4_t a = vmulq_f32(X_re, G_re);
|
||||
const float32x4_t e = vmlaq_f32(a, X_im, G_im);
|
||||
const float32x4_t c = vmulq_f32(X_re, G_im);
|
||||
const float32x4_t f = vmlsq_f32(c, X_im, G_re);
|
||||
const float32x4_t g = vaddq_f32(H_re, e);
|
||||
const float32x4_t h = vaddq_f32(H_im, f);
|
||||
vst1q_f32(&H_p_ch.re[k], g);
|
||||
vst1q_f32(&H_p_ch.im[k], h);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
X_partition = 0;
|
||||
limit = lim2;
|
||||
} while (p < lim2);
|
||||
|
||||
X_partition = render_buffer.Position();
|
||||
limit = lim1;
|
||||
p = 0;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
|
||||
H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
|
||||
X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
|
||||
H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
|
||||
X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
X_partition = 0;
|
||||
limit = lim2;
|
||||
} while (p < lim2);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
// Adapts the filter partitions. (SSE2 variant)
|
||||
void AdaptPartitions_Sse2(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H) {
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
const size_t num_render_channels = render_buffer_data[0].size();
|
||||
const size_t lim1 = std::min(
|
||||
render_buffer_data.size() - render_buffer.Position(), num_partitions);
|
||||
const size_t lim2 = num_partitions;
|
||||
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
|
||||
|
||||
size_t X_partition = render_buffer.Position();
|
||||
size_t limit = lim1;
|
||||
size_t p = 0;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
|
||||
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
|
||||
const __m128 G_re = _mm_loadu_ps(&G.re[k]);
|
||||
const __m128 G_im = _mm_loadu_ps(&G.im[k]);
|
||||
const __m128 X_re = _mm_loadu_ps(&X.re[k]);
|
||||
const __m128 X_im = _mm_loadu_ps(&X.im[k]);
|
||||
const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]);
|
||||
const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]);
|
||||
const __m128 a = _mm_mul_ps(X_re, G_re);
|
||||
const __m128 b = _mm_mul_ps(X_im, G_im);
|
||||
const __m128 c = _mm_mul_ps(X_re, G_im);
|
||||
const __m128 d = _mm_mul_ps(X_im, G_re);
|
||||
const __m128 e = _mm_add_ps(a, b);
|
||||
const __m128 f = _mm_sub_ps(c, d);
|
||||
const __m128 g = _mm_add_ps(H_re, e);
|
||||
const __m128 h = _mm_add_ps(H_im, f);
|
||||
_mm_storeu_ps(&H_p_ch.re[k], g);
|
||||
_mm_storeu_ps(&H_p_ch.im[k], h);
|
||||
}
|
||||
}
|
||||
}
|
||||
X_partition = 0;
|
||||
limit = lim2;
|
||||
} while (p < lim2);
|
||||
|
||||
X_partition = render_buffer.Position();
|
||||
limit = lim1;
|
||||
p = 0;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
|
||||
H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
|
||||
X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
|
||||
H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
|
||||
X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
|
||||
X_partition = 0;
|
||||
limit = lim2;
|
||||
} while (p < lim2);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Produces the filter output.
|
||||
void ApplyFilter(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S) {
|
||||
S->re.fill(0.f);
|
||||
S->im.fill(0.f);
|
||||
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
size_t index = render_buffer.Position();
|
||||
const size_t num_render_channels = render_buffer_data[index].size();
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(num_render_channels, H[p].size());
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& X_p_ch = render_buffer_data[index][ch];
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
S->re[k] += X_p_ch.re[k] * H_p_ch.re[k] - X_p_ch.im[k] * H_p_ch.im[k];
|
||||
S->im[k] += X_p_ch.re[k] * H_p_ch.im[k] + X_p_ch.im[k] * H_p_ch.re[k];
|
||||
}
|
||||
}
|
||||
index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
// Produces the filter output (Neon variant).
|
||||
void ApplyFilter_Neon(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S) {
|
||||
// const RenderBuffer& render_buffer,
|
||||
// rtc::ArrayView<const FftData> H,
|
||||
// FftData* S) {
|
||||
RTC_DCHECK_GE(H.size(), H.size() - 1);
|
||||
S->Clear();
|
||||
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
const size_t num_render_channels = render_buffer_data[0].size();
|
||||
const size_t lim1 = std::min(
|
||||
render_buffer_data.size() - render_buffer.Position(), num_partitions);
|
||||
const size_t lim2 = num_partitions;
|
||||
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
|
||||
|
||||
size_t X_partition = render_buffer.Position();
|
||||
size_t p = 0;
|
||||
size_t limit = lim1;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
|
||||
const float32x4_t X_re = vld1q_f32(&X.re[k]);
|
||||
const float32x4_t X_im = vld1q_f32(&X.im[k]);
|
||||
const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]);
|
||||
const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]);
|
||||
const float32x4_t S_re = vld1q_f32(&S->re[k]);
|
||||
const float32x4_t S_im = vld1q_f32(&S->im[k]);
|
||||
const float32x4_t a = vmulq_f32(X_re, H_re);
|
||||
const float32x4_t e = vmlsq_f32(a, X_im, H_im);
|
||||
const float32x4_t c = vmulq_f32(X_re, H_im);
|
||||
const float32x4_t f = vmlaq_f32(c, X_im, H_re);
|
||||
const float32x4_t g = vaddq_f32(S_re, e);
|
||||
const float32x4_t h = vaddq_f32(S_im, f);
|
||||
vst1q_f32(&S->re[k], g);
|
||||
vst1q_f32(&S->im[k], h);
|
||||
}
|
||||
}
|
||||
}
|
||||
limit = lim2;
|
||||
X_partition = 0;
|
||||
} while (p < lim2);
|
||||
|
||||
X_partition = render_buffer.Position();
|
||||
p = 0;
|
||||
limit = lim1;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] -
|
||||
X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
|
||||
S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] +
|
||||
X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
limit = lim2;
|
||||
X_partition = 0;
|
||||
} while (p < lim2);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
// Produces the filter output (SSE2 variant).
|
||||
void ApplyFilter_Sse2(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S) {
|
||||
// const RenderBuffer& render_buffer,
|
||||
// rtc::ArrayView<const FftData> H,
|
||||
// FftData* S) {
|
||||
RTC_DCHECK_GE(H.size(), H.size() - 1);
|
||||
S->re.fill(0.f);
|
||||
S->im.fill(0.f);
|
||||
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
const size_t num_render_channels = render_buffer_data[0].size();
|
||||
const size_t lim1 = std::min(
|
||||
render_buffer_data.size() - render_buffer.Position(), num_partitions);
|
||||
const size_t lim2 = num_partitions;
|
||||
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
|
||||
|
||||
size_t X_partition = render_buffer.Position();
|
||||
size_t p = 0;
|
||||
size_t limit = lim1;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
|
||||
const __m128 X_re = _mm_loadu_ps(&X.re[k]);
|
||||
const __m128 X_im = _mm_loadu_ps(&X.im[k]);
|
||||
const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]);
|
||||
const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]);
|
||||
const __m128 S_re = _mm_loadu_ps(&S->re[k]);
|
||||
const __m128 S_im = _mm_loadu_ps(&S->im[k]);
|
||||
const __m128 a = _mm_mul_ps(X_re, H_re);
|
||||
const __m128 b = _mm_mul_ps(X_im, H_im);
|
||||
const __m128 c = _mm_mul_ps(X_re, H_im);
|
||||
const __m128 d = _mm_mul_ps(X_im, H_re);
|
||||
const __m128 e = _mm_sub_ps(a, b);
|
||||
const __m128 f = _mm_add_ps(c, d);
|
||||
const __m128 g = _mm_add_ps(S_re, e);
|
||||
const __m128 h = _mm_add_ps(S_im, f);
|
||||
_mm_storeu_ps(&S->re[k], g);
|
||||
_mm_storeu_ps(&S->im[k], h);
|
||||
}
|
||||
}
|
||||
}
|
||||
limit = lim2;
|
||||
X_partition = 0;
|
||||
} while (p < lim2);
|
||||
|
||||
X_partition = render_buffer.Position();
|
||||
p = 0;
|
||||
limit = lim1;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] -
|
||||
X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
|
||||
S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] +
|
||||
X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
limit = lim2;
|
||||
X_partition = 0;
|
||||
} while (p < lim2);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
namespace {
|
||||
|
||||
// Ensures that the newly added filter partitions after a size increase are set
|
||||
// to zero.
|
||||
void ZeroFilter(size_t old_size,
|
||||
size_t new_size,
|
||||
std::vector<std::vector<FftData>>* H) {
|
||||
RTC_DCHECK_GE(H->size(), old_size);
|
||||
RTC_DCHECK_GE(H->size(), new_size);
|
||||
|
||||
for (size_t p = old_size; p < new_size; ++p) {
|
||||
RTC_DCHECK_EQ((*H)[p].size(), (*H)[0].size());
|
||||
for (size_t ch = 0; ch < (*H)[0].size(); ++ch) {
|
||||
(*H)[p][ch].Clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
AdaptiveFirFilter::AdaptiveFirFilter(size_t max_size_partitions,
|
||||
size_t initial_size_partitions,
|
||||
size_t size_change_duration_blocks,
|
||||
size_t num_render_channels,
|
||||
Aec3Optimization optimization,
|
||||
ApmDataDumper* data_dumper)
|
||||
: data_dumper_(data_dumper),
|
||||
fft_(),
|
||||
optimization_(optimization),
|
||||
num_render_channels_(num_render_channels),
|
||||
max_size_partitions_(max_size_partitions),
|
||||
size_change_duration_blocks_(
|
||||
static_cast<int>(size_change_duration_blocks)),
|
||||
current_size_partitions_(initial_size_partitions),
|
||||
target_size_partitions_(initial_size_partitions),
|
||||
old_target_size_partitions_(initial_size_partitions),
|
||||
H_(max_size_partitions_, std::vector<FftData>(num_render_channels_)) {
|
||||
RTC_DCHECK(data_dumper_);
|
||||
RTC_DCHECK_GE(max_size_partitions, initial_size_partitions);
|
||||
|
||||
RTC_DCHECK_LT(0, size_change_duration_blocks_);
|
||||
one_by_size_change_duration_blocks_ = 1.f / size_change_duration_blocks_;
|
||||
|
||||
ZeroFilter(0, max_size_partitions_, &H_);
|
||||
|
||||
SetSizePartitions(current_size_partitions_, true);
|
||||
}
|
||||
|
||||
AdaptiveFirFilter::~AdaptiveFirFilter() = default;
|
||||
|
||||
void AdaptiveFirFilter::HandleEchoPathChange() {
|
||||
// TODO(peah): Check the value and purpose of the code below.
|
||||
ZeroFilter(current_size_partitions_, max_size_partitions_, &H_);
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::SetSizePartitions(size_t size, bool immediate_effect) {
|
||||
RTC_DCHECK_EQ(max_size_partitions_, H_.capacity());
|
||||
RTC_DCHECK_LE(size, max_size_partitions_);
|
||||
|
||||
target_size_partitions_ = std::min(max_size_partitions_, size);
|
||||
if (immediate_effect) {
|
||||
size_t old_size_partitions_ = current_size_partitions_;
|
||||
current_size_partitions_ = old_target_size_partitions_ =
|
||||
target_size_partitions_;
|
||||
ZeroFilter(old_size_partitions_, current_size_partitions_, &H_);
|
||||
|
||||
partition_to_constrain_ =
|
||||
std::min(partition_to_constrain_, current_size_partitions_ - 1);
|
||||
size_change_counter_ = 0;
|
||||
} else {
|
||||
size_change_counter_ = size_change_duration_blocks_;
|
||||
}
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::UpdateSize() {
|
||||
RTC_DCHECK_GE(size_change_duration_blocks_, size_change_counter_);
|
||||
size_t old_size_partitions_ = current_size_partitions_;
|
||||
if (size_change_counter_ > 0) {
|
||||
--size_change_counter_;
|
||||
|
||||
auto average = [](float from, float to, float from_weight) {
|
||||
return from * from_weight + to * (1.f - from_weight);
|
||||
};
|
||||
|
||||
float change_factor =
|
||||
size_change_counter_ * one_by_size_change_duration_blocks_;
|
||||
|
||||
current_size_partitions_ = average(old_target_size_partitions_,
|
||||
target_size_partitions_, change_factor);
|
||||
|
||||
partition_to_constrain_ =
|
||||
std::min(partition_to_constrain_, current_size_partitions_ - 1);
|
||||
} else {
|
||||
current_size_partitions_ = old_target_size_partitions_ =
|
||||
target_size_partitions_;
|
||||
}
|
||||
ZeroFilter(old_size_partitions_, current_size_partitions_, &H_);
|
||||
RTC_DCHECK_LE(0, size_change_counter_);
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer,
|
||||
FftData* S) const {
|
||||
RTC_DCHECK(S);
|
||||
switch (optimization_) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2:
|
||||
aec3::ApplyFilter_Sse2(render_buffer, current_size_partitions_, H_, S);
|
||||
break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
aec3::ApplyFilter_Avx2(render_buffer, current_size_partitions_, H_, S);
|
||||
break;
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
case Aec3Optimization::kNeon:
|
||||
aec3::ApplyFilter_Neon(render_buffer, current_size_partitions_, H_, S);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
aec3::ApplyFilter(render_buffer, current_size_partitions_, H_, S);
|
||||
}
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
|
||||
const FftData& G) {
|
||||
// Adapt the filter and update the filter size.
|
||||
AdaptAndUpdateSize(render_buffer, G);
|
||||
|
||||
// Constrain the filter partitions in a cyclic manner.
|
||||
Constrain();
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
std::vector<float>* impulse_response) {
|
||||
// Adapt the filter and update the filter size.
|
||||
AdaptAndUpdateSize(render_buffer, G);
|
||||
|
||||
// Constrain the filter partitions in a cyclic manner.
|
||||
ConstrainAndUpdateImpulseResponse(impulse_response);
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::ComputeFrequencyResponse(
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) const {
|
||||
RTC_DCHECK_GE(max_size_partitions_, H2->capacity());
|
||||
|
||||
H2->resize(current_size_partitions_);
|
||||
|
||||
switch (optimization_) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2:
|
||||
aec3::ComputeFrequencyResponse_Sse2(current_size_partitions_, H_, H2);
|
||||
break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
aec3::ComputeFrequencyResponse_Avx2(current_size_partitions_, H_, H2);
|
||||
break;
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
case Aec3Optimization::kNeon:
|
||||
aec3::ComputeFrequencyResponse_Neon(current_size_partitions_, H_, H2);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
aec3::ComputeFrequencyResponse(current_size_partitions_, H_, H2);
|
||||
}
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::AdaptAndUpdateSize(const RenderBuffer& render_buffer,
|
||||
const FftData& G) {
|
||||
// Update the filter size if needed.
|
||||
UpdateSize();
|
||||
|
||||
// Adapt the filter.
|
||||
switch (optimization_) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2:
|
||||
aec3::AdaptPartitions_Sse2(render_buffer, G, current_size_partitions_,
|
||||
&H_);
|
||||
break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
aec3::AdaptPartitions_Avx2(render_buffer, G, current_size_partitions_,
|
||||
&H_);
|
||||
break;
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
case Aec3Optimization::kNeon:
|
||||
aec3::AdaptPartitions_Neon(render_buffer, G, current_size_partitions_,
|
||||
&H_);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
aec3::AdaptPartitions(render_buffer, G, current_size_partitions_, &H_);
|
||||
}
|
||||
}
|
||||
|
||||
// Constrains the partition of the frequency domain filter to be limited in
|
||||
// time via setting the relevant time-domain coefficients to zero and updates
|
||||
// the corresponding values in an externally stored impulse response estimate.
|
||||
void AdaptiveFirFilter::ConstrainAndUpdateImpulseResponse(
|
||||
std::vector<float>* impulse_response) {
|
||||
RTC_DCHECK_EQ(GetTimeDomainLength(max_size_partitions_),
|
||||
impulse_response->capacity());
|
||||
impulse_response->resize(GetTimeDomainLength(current_size_partitions_));
|
||||
std::array<float, kFftLength> h;
|
||||
impulse_response->resize(GetTimeDomainLength(current_size_partitions_));
|
||||
std::fill(
|
||||
impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2,
|
||||
impulse_response->begin() + (partition_to_constrain_ + 1) * kFftLengthBy2,
|
||||
0.f);
|
||||
|
||||
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
|
||||
fft_.Ifft(H_[partition_to_constrain_][ch], &h);
|
||||
|
||||
static constexpr float kScale = 1.0f / kFftLengthBy2;
|
||||
std::for_each(h.begin(), h.begin() + kFftLengthBy2,
|
||||
[](float& a) { a *= kScale; });
|
||||
std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
|
||||
|
||||
if (ch == 0) {
|
||||
std::copy(
|
||||
h.begin(), h.begin() + kFftLengthBy2,
|
||||
impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2);
|
||||
} else {
|
||||
for (size_t k = 0, j = partition_to_constrain_ * kFftLengthBy2;
|
||||
k < kFftLengthBy2; ++k, ++j) {
|
||||
if (fabsf((*impulse_response)[j]) < fabsf(h[k])) {
|
||||
(*impulse_response)[j] = h[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fft_.Fft(&h, &H_[partition_to_constrain_][ch]);
|
||||
}
|
||||
|
||||
partition_to_constrain_ =
|
||||
partition_to_constrain_ < (current_size_partitions_ - 1)
|
||||
? partition_to_constrain_ + 1
|
||||
: 0;
|
||||
}
|
||||
|
||||
// Constrains the a partiton of the frequency domain filter to be limited in
|
||||
// time via setting the relevant time-domain coefficients to zero.
|
||||
void AdaptiveFirFilter::Constrain() {
|
||||
std::array<float, kFftLength> h;
|
||||
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
|
||||
fft_.Ifft(H_[partition_to_constrain_][ch], &h);
|
||||
|
||||
static constexpr float kScale = 1.0f / kFftLengthBy2;
|
||||
std::for_each(h.begin(), h.begin() + kFftLengthBy2,
|
||||
[](float& a) { a *= kScale; });
|
||||
std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
|
||||
|
||||
fft_.Fft(&h, &H_[partition_to_constrain_][ch]);
|
||||
}
|
||||
|
||||
partition_to_constrain_ =
|
||||
partition_to_constrain_ < (current_size_partitions_ - 1)
|
||||
? partition_to_constrain_ + 1
|
||||
: 0;
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::ScaleFilter(float factor) {
|
||||
for (auto& H_p : H_) {
|
||||
for (auto& H_p_ch : H_p) {
|
||||
for (auto& re : H_p_ch.re) {
|
||||
re *= factor;
|
||||
}
|
||||
for (auto& im : H_p_ch.im) {
|
||||
im *= factor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set the filter coefficients.
|
||||
void AdaptiveFirFilter::SetFilter(size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H) {
|
||||
const size_t min_num_partitions =
|
||||
std::min(current_size_partitions_, num_partitions);
|
||||
for (size_t p = 0; p < min_num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(H_[p].size(), H[p].size());
|
||||
RTC_DCHECK_EQ(num_render_channels_, H_[p].size());
|
||||
|
||||
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
|
||||
std::copy(H[p][ch].re.begin(), H[p][ch].re.end(), H_[p][ch].re.begin());
|
||||
std::copy(H[p][ch].im.begin(), H[p][ch].im.end(), H_[p][ch].im.begin());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
192
VocieProcess/modules/audio_processing/aec3/adaptive_fir_filter.h
Normal file
192
VocieProcess/modules/audio_processing/aec3/adaptive_fir_filter.h
Normal file
@ -0,0 +1,192 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec3_fft.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
// Computes and stores the frequency response of the filter.
|
||||
void ComputeFrequencyResponse(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
void ComputeFrequencyResponse_Neon(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
void ComputeFrequencyResponse_Sse2(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
|
||||
|
||||
void ComputeFrequencyResponse_Avx2(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
|
||||
#endif
|
||||
|
||||
// Adapts the filter partitions.
|
||||
void AdaptPartitions(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H);
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
void AdaptPartitions_Neon(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H);
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
void AdaptPartitions_Sse2(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H);
|
||||
|
||||
void AdaptPartitions_Avx2(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H);
|
||||
#endif
|
||||
|
||||
// Produces the filter output.
|
||||
void ApplyFilter(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S);
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
void ApplyFilter_Neon(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S);
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
void ApplyFilter_Sse2(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S);
|
||||
|
||||
void ApplyFilter_Avx2(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S);
|
||||
#endif
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
// Provides a frequency domain adaptive filter functionality.
|
||||
class AdaptiveFirFilter {
|
||||
public:
|
||||
AdaptiveFirFilter(size_t max_size_partitions,
|
||||
size_t initial_size_partitions,
|
||||
size_t size_change_duration_blocks,
|
||||
size_t num_render_channels,
|
||||
Aec3Optimization optimization,
|
||||
ApmDataDumper* data_dumper);
|
||||
|
||||
~AdaptiveFirFilter();
|
||||
|
||||
AdaptiveFirFilter(const AdaptiveFirFilter&) = delete;
|
||||
AdaptiveFirFilter& operator=(const AdaptiveFirFilter&) = delete;
|
||||
|
||||
// Produces the output of the filter.
|
||||
void Filter(const RenderBuffer& render_buffer, FftData* S) const;
|
||||
|
||||
// Adapts the filter and updates an externally stored impulse response
|
||||
// estimate.
|
||||
void Adapt(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
std::vector<float>* impulse_response);
|
||||
|
||||
// Adapts the filter.
|
||||
void Adapt(const RenderBuffer& render_buffer, const FftData& G);
|
||||
|
||||
// Receives reports that known echo path changes have occured and adjusts
|
||||
// the filter adaptation accordingly.
|
||||
void HandleEchoPathChange();
|
||||
|
||||
// Returns the filter size.
|
||||
size_t SizePartitions() const { return current_size_partitions_; }
|
||||
|
||||
// Sets the filter size.
|
||||
void SetSizePartitions(size_t size, bool immediate_effect);
|
||||
|
||||
// Computes the frequency responses for the filter partitions.
|
||||
void ComputeFrequencyResponse(
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) const;
|
||||
|
||||
// Returns the maximum number of partitions for the filter.
|
||||
size_t max_filter_size_partitions() const { return max_size_partitions_; }
|
||||
|
||||
void DumpFilter(absl::string_view name_frequency_domain) {
|
||||
for (size_t p = 0; p < max_size_partitions_; ++p) {
|
||||
data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].re);
|
||||
data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].im);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale the filter impulse response and spectrum by a factor.
|
||||
void ScaleFilter(float factor);
|
||||
|
||||
// Set the filter coefficients.
|
||||
void SetFilter(size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H);
|
||||
|
||||
// Gets the filter coefficients.
|
||||
const std::vector<std::vector<FftData>>& GetFilter() const { return H_; }
|
||||
|
||||
private:
|
||||
// Adapts the filter and updates the filter size.
|
||||
void AdaptAndUpdateSize(const RenderBuffer& render_buffer, const FftData& G);
|
||||
|
||||
// Constrain the filter partitions in a cyclic manner.
|
||||
void Constrain();
|
||||
// Constrains the filter in a cyclic manner and updates the corresponding
|
||||
// values in the supplied impulse response.
|
||||
void ConstrainAndUpdateImpulseResponse(std::vector<float>* impulse_response);
|
||||
|
||||
// Gradually Updates the current filter size towards the target size.
|
||||
void UpdateSize();
|
||||
|
||||
ApmDataDumper* const data_dumper_;
|
||||
const Aec3Fft fft_;
|
||||
const Aec3Optimization optimization_;
|
||||
const size_t num_render_channels_;
|
||||
const size_t max_size_partitions_;
|
||||
const int size_change_duration_blocks_;
|
||||
float one_by_size_change_duration_blocks_;
|
||||
size_t current_size_partitions_;
|
||||
size_t target_size_partitions_;
|
||||
size_t old_target_size_partitions_;
|
||||
int size_change_counter_ = 0;
|
||||
std::vector<std::vector<FftData>> H_;
|
||||
size_t partition_to_constrain_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
|
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
// Computes and stores the echo return loss estimate of the filter, which is the
|
||||
// sum of the partition frequency responses.
|
||||
void ErlComputer(const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl) {
|
||||
std::fill(erl.begin(), erl.end(), 0.f);
|
||||
for (auto& H2_j : H2) {
|
||||
std::transform(H2_j.begin(), H2_j.end(), erl.begin(), erl.begin(),
|
||||
std::plus<float>());
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
// Computes and stores the echo return loss estimate of the filter, which is the
|
||||
// sum of the partition frequency responses.
|
||||
void ErlComputer_NEON(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl) {
|
||||
std::fill(erl.begin(), erl.end(), 0.f);
|
||||
for (auto& H2_j : H2) {
|
||||
for (size_t k = 0; k < kFftLengthBy2; k += 4) {
|
||||
const float32x4_t H2_j_k = vld1q_f32(&H2_j[k]);
|
||||
float32x4_t erl_k = vld1q_f32(&erl[k]);
|
||||
erl_k = vaddq_f32(erl_k, H2_j_k);
|
||||
vst1q_f32(&erl[k], erl_k);
|
||||
}
|
||||
erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
// Computes and stores the echo return loss estimate of the filter, which is the
|
||||
// sum of the partition frequency responses.
|
||||
void ErlComputer_SSE2(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl) {
|
||||
std::fill(erl.begin(), erl.end(), 0.f);
|
||||
for (auto& H2_j : H2) {
|
||||
for (size_t k = 0; k < kFftLengthBy2; k += 4) {
|
||||
const __m128 H2_j_k = _mm_loadu_ps(&H2_j[k]);
|
||||
__m128 erl_k = _mm_loadu_ps(&erl[k]);
|
||||
erl_k = _mm_add_ps(erl_k, H2_j_k);
|
||||
_mm_storeu_ps(&erl[k], erl_k);
|
||||
}
|
||||
erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
void ComputeErl(const Aec3Optimization& optimization,
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, erl.size());
|
||||
// Update the frequency response and echo return loss for the filter.
|
||||
switch (optimization) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2:
|
||||
aec3::ErlComputer_SSE2(H2, erl);
|
||||
break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
aec3::ErlComputer_AVX2(H2, erl);
|
||||
break;
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
case Aec3Optimization::kNeon:
|
||||
aec3::ErlComputer_NEON(H2, erl);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
aec3::ErlComputer(H2, erl);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
|
||||
// Computes and stores the echo return loss estimate of the filter, which is the
|
||||
// sum of the partition frequency responses.
|
||||
void ErlComputer(const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl);
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
void ErlComputer_NEON(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl);
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
void ErlComputer_SSE2(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl);
|
||||
|
||||
void ErlComputer_AVX2(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl);
|
||||
#endif
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
// Computes the echo return loss based on a frequency response.
|
||||
void ComputeErl(const Aec3Optimization& optimization,
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
|
58
VocieProcess/modules/audio_processing/aec3/aec3_common.cc
Normal file
58
VocieProcess/modules/audio_processing/aec3/aec3_common.cc
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
#include "system_wrappers/include/cpu_features_wrapper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
Aec3Optimization DetectOptimization() {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
if (GetCPUInfo(kAVX2) != 0) {
|
||||
return Aec3Optimization::kAvx2;
|
||||
} else if (GetCPUInfo(kSSE2) != 0) {
|
||||
return Aec3Optimization::kSse2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
return Aec3Optimization::kNeon;
|
||||
#else
|
||||
return Aec3Optimization::kNone;
|
||||
#endif
|
||||
}
|
||||
|
||||
float FastApproxLog2f(const float in) {
|
||||
RTC_DCHECK_GT(in, .0f);
|
||||
// Read and interpret float as uint32_t and then cast to float.
|
||||
// This is done to extract the exponent (bits 30 - 23).
|
||||
// "Right shift" of the exponent is then performed by multiplying
|
||||
// with the constant (1/2^23). Finally, we subtract a constant to
|
||||
// remove the bias (https://en.wikipedia.org/wiki/Exponent_bias).
|
||||
union {
|
||||
float dummy;
|
||||
uint32_t a;
|
||||
} x = {in};
|
||||
float out = x.a;
|
||||
out *= 1.1920929e-7f; // 1/2^23
|
||||
out -= 126.942695f; // Remove bias.
|
||||
return out;
|
||||
}
|
||||
|
||||
float Log2TodB(const float in_log2) {
|
||||
return 3.0102999566398121 * in_log2;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
114
VocieProcess/modules/audio_processing/aec3/aec3_common.h
Normal file
114
VocieProcess/modules/audio_processing/aec3/aec3_common.h
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
#ifdef _MSC_VER /* visual c++ */
|
||||
#define ALIGN16_BEG __declspec(align(16))
|
||||
#define ALIGN16_END
|
||||
#else /* gcc or icc */
|
||||
#define ALIGN16_BEG
|
||||
#define ALIGN16_END __attribute__((aligned(16)))
|
||||
#endif
|
||||
|
||||
enum class Aec3Optimization { kNone, kSse2, kAvx2, kNeon };
|
||||
|
||||
constexpr int kNumBlocksPerSecond = 250;
|
||||
|
||||
constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond;
|
||||
constexpr int kMetricsComputationBlocks = 3;
|
||||
constexpr int kMetricsCollectionBlocks =
|
||||
kMetricsReportingIntervalBlocks - kMetricsComputationBlocks;
|
||||
|
||||
constexpr size_t kFftLengthBy2 = 64;
|
||||
constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1;
|
||||
constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1;
|
||||
constexpr size_t kFftLength = 2 * kFftLengthBy2;
|
||||
constexpr size_t kFftLengthBy2Log2 = 6;
|
||||
|
||||
constexpr int kRenderTransferQueueSizeFrames = 100;
|
||||
|
||||
constexpr size_t kMaxNumBands = 3;
|
||||
constexpr size_t kFrameSize = 160;
|
||||
constexpr size_t kSubFrameLength = kFrameSize / 2;
|
||||
|
||||
constexpr size_t kBlockSize = kFftLengthBy2;
|
||||
constexpr size_t kBlockSizeLog2 = kFftLengthBy2Log2;
|
||||
|
||||
constexpr size_t kExtendedBlockSize = 2 * kFftLengthBy2;
|
||||
constexpr size_t kMatchedFilterWindowSizeSubBlocks = 32;
|
||||
constexpr size_t kMatchedFilterAlignmentShiftSizeSubBlocks =
|
||||
kMatchedFilterWindowSizeSubBlocks * 3 / 4;
|
||||
|
||||
// TODO(peah): Integrate this with how it is done inside audio_processing_impl.
|
||||
constexpr size_t NumBandsForRate(int sample_rate_hz) {
|
||||
return static_cast<size_t>(sample_rate_hz / 16000);
|
||||
}
|
||||
|
||||
constexpr bool ValidFullBandRate(int sample_rate_hz) {
|
||||
return sample_rate_hz == 16000 || sample_rate_hz == 32000 ||
|
||||
sample_rate_hz == 48000;
|
||||
}
|
||||
|
||||
constexpr int GetTimeDomainLength(int filter_length_blocks) {
|
||||
return filter_length_blocks * kFftLengthBy2;
|
||||
}
|
||||
|
||||
constexpr size_t GetDownSampledBufferSize(size_t down_sampling_factor,
|
||||
size_t num_matched_filters) {
|
||||
return kBlockSize / down_sampling_factor *
|
||||
(kMatchedFilterAlignmentShiftSizeSubBlocks * num_matched_filters +
|
||||
kMatchedFilterWindowSizeSubBlocks + 1);
|
||||
}
|
||||
|
||||
constexpr size_t GetRenderDelayBufferSize(size_t down_sampling_factor,
|
||||
size_t num_matched_filters,
|
||||
size_t filter_length_blocks) {
|
||||
return GetDownSampledBufferSize(down_sampling_factor, num_matched_filters) /
|
||||
(kBlockSize / down_sampling_factor) +
|
||||
filter_length_blocks + 1;
|
||||
}
|
||||
|
||||
// Detects what kind of optimizations to use for the code.
|
||||
Aec3Optimization DetectOptimization();
|
||||
|
||||
// Computes the log2 of the input in a fast an approximate manner.
|
||||
float FastApproxLog2f(float in);
|
||||
|
||||
// Returns dB from a power quantity expressed in log2.
|
||||
float Log2TodB(float in_log2);
|
||||
|
||||
static_assert(1 << kBlockSizeLog2 == kBlockSize,
|
||||
"Proper number of shifts for blocksize");
|
||||
|
||||
static_assert(1 << kFftLengthBy2Log2 == kFftLengthBy2,
|
||||
"Proper number of shifts for the fft length");
|
||||
|
||||
static_assert(1 == NumBandsForRate(16000), "Number of bands for 16 kHz");
|
||||
static_assert(2 == NumBandsForRate(32000), "Number of bands for 32 kHz");
|
||||
static_assert(3 == NumBandsForRate(48000), "Number of bands for 48 kHz");
|
||||
|
||||
static_assert(ValidFullBandRate(16000),
|
||||
"Test that 16 kHz is a valid sample rate");
|
||||
static_assert(ValidFullBandRate(32000),
|
||||
"Test that 32 kHz is a valid sample rate");
|
||||
static_assert(ValidFullBandRate(48000),
|
||||
"Test that 48 kHz is a valid sample rate");
|
||||
static_assert(!ValidFullBandRate(8001),
|
||||
"Test that 8001 Hz is not a valid sample rate");
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
|
144
VocieProcess/modules/audio_processing/aec3/aec3_fft.cc
Normal file
144
VocieProcess/modules/audio_processing/aec3/aec3_fft.cc
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_fft.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/cpu_features_wrapper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
const float kHanning64[kFftLengthBy2] = {
|
||||
0.f, 0.00248461f, 0.00991376f, 0.0222136f, 0.03926189f,
|
||||
0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f,
|
||||
0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f,
|
||||
0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f,
|
||||
0.70564355f, 0.75f, 0.79187184f, 0.83084292f, 0.86652594f,
|
||||
0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f,
|
||||
0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f,
|
||||
0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f,
|
||||
0.83084292f, 0.79187184f, 0.75f, 0.70564355f, 0.65924333f,
|
||||
0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f,
|
||||
0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f,
|
||||
0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f,
|
||||
0.0222136f, 0.00991376f, 0.00248461f, 0.f};
|
||||
|
||||
// Hanning window from Matlab command win = sqrt(hanning(128)).
|
||||
const float kSqrtHanning128[kFftLength] = {
|
||||
0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,
|
||||
0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f,
|
||||
0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
|
||||
0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f,
|
||||
0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f,
|
||||
0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
|
||||
0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f,
|
||||
0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f,
|
||||
0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
|
||||
0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f,
|
||||
0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f,
|
||||
0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
|
||||
0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f,
|
||||
0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f,
|
||||
0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
|
||||
0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f,
|
||||
1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f,
|
||||
0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f,
|
||||
0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f,
|
||||
0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f,
|
||||
0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f,
|
||||
0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f,
|
||||
0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f,
|
||||
0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f,
|
||||
0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f,
|
||||
0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f,
|
||||
0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f,
|
||||
0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f,
|
||||
0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f,
|
||||
0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f,
|
||||
0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f,
|
||||
0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f};
|
||||
|
||||
bool IsSse2Available() {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
return GetCPUInfo(kSSE2) != 0;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Aec3Fft::Aec3Fft() : ooura_fft_(IsSse2Available()) {}
|
||||
|
||||
// TODO(peah): Change x to be std::array once the rest of the code allows this.
|
||||
void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x,
|
||||
Window window,
|
||||
FftData* X) const {
|
||||
RTC_DCHECK(X);
|
||||
RTC_DCHECK_EQ(kFftLengthBy2, x.size());
|
||||
std::array<float, kFftLength> fft;
|
||||
std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f);
|
||||
switch (window) {
|
||||
case Window::kRectangular:
|
||||
std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2);
|
||||
break;
|
||||
case Window::kHanning:
|
||||
std::transform(x.begin(), x.end(), std::begin(kHanning64),
|
||||
fft.begin() + kFftLengthBy2,
|
||||
[](float a, float b) { return a * b; });
|
||||
break;
|
||||
case Window::kSqrtHanning:
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
break;
|
||||
default:
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
}
|
||||
|
||||
Fft(&fft, X);
|
||||
}
|
||||
|
||||
void Aec3Fft::PaddedFft(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> x_old,
|
||||
Window window,
|
||||
FftData* X) const {
|
||||
RTC_DCHECK(X);
|
||||
RTC_DCHECK_EQ(kFftLengthBy2, x.size());
|
||||
RTC_DCHECK_EQ(kFftLengthBy2, x_old.size());
|
||||
std::array<float, kFftLength> fft;
|
||||
|
||||
switch (window) {
|
||||
case Window::kRectangular:
|
||||
std::copy(x_old.begin(), x_old.end(), fft.begin());
|
||||
std::copy(x.begin(), x.end(), fft.begin() + x_old.size());
|
||||
break;
|
||||
case Window::kHanning:
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
break;
|
||||
case Window::kSqrtHanning:
|
||||
std::transform(x_old.begin(), x_old.end(), std::begin(kSqrtHanning128),
|
||||
fft.begin(), std::multiplies<float>());
|
||||
std::transform(x.begin(), x.end(),
|
||||
std::begin(kSqrtHanning128) + x_old.size(),
|
||||
fft.begin() + x_old.size(), std::multiplies<float>());
|
||||
break;
|
||||
default:
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
}
|
||||
|
||||
Fft(&fft, X);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
75
VocieProcess/modules/audio_processing/aec3/aec3_fft.h
Normal file
75
VocieProcess/modules/audio_processing/aec3/aec3_fft.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Wrapper class that provides 128 point real valued FFT functionality with the
|
||||
// FftData type.
|
||||
class Aec3Fft {
|
||||
public:
|
||||
enum class Window { kRectangular, kHanning, kSqrtHanning };
|
||||
|
||||
Aec3Fft();
|
||||
|
||||
Aec3Fft(const Aec3Fft&) = delete;
|
||||
Aec3Fft& operator=(const Aec3Fft&) = delete;
|
||||
|
||||
// Computes the FFT. Note that both the input and output are modified.
|
||||
void Fft(std::array<float, kFftLength>* x, FftData* X) const {
|
||||
RTC_DCHECK(x);
|
||||
RTC_DCHECK(X);
|
||||
ooura_fft_.Fft(x->data());
|
||||
X->CopyFromPackedArray(*x);
|
||||
}
|
||||
// Computes the inverse Fft.
|
||||
void Ifft(const FftData& X, std::array<float, kFftLength>* x) const {
|
||||
RTC_DCHECK(x);
|
||||
X.CopyToPackedArray(x);
|
||||
ooura_fft_.InverseFft(x->data());
|
||||
}
|
||||
|
||||
// Windows the input using a Hanning window, and then adds padding of
|
||||
// kFftLengthBy2 initial zeros before computing the Fft.
|
||||
void ZeroPaddedFft(rtc::ArrayView<const float> x,
|
||||
Window window,
|
||||
FftData* X) const;
|
||||
|
||||
// Concatenates the kFftLengthBy2 values long x and x_old before computing the
|
||||
// Fft. After that, x is copied to x_old.
|
||||
void PaddedFft(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> x_old,
|
||||
FftData* X) const {
|
||||
PaddedFft(x, x_old, Window::kRectangular, X);
|
||||
}
|
||||
|
||||
// Padded Fft using a time-domain window.
|
||||
void PaddedFft(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> x_old,
|
||||
Window window,
|
||||
FftData* X) const;
|
||||
|
||||
private:
|
||||
const OouraFft ooura_fft_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
|
481
VocieProcess/modules/audio_processing/aec3/aec_state.cc
Normal file
481
VocieProcess/modules/audio_processing/aec3/aec_state.cc
Normal file
@ -0,0 +1,481 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
bool DeactivateInitialStateResetAtEchoPathChange() {
|
||||
return field_trial::IsEnabled(
|
||||
"WebRTC-Aec3DeactivateInitialStateResetKillSwitch");
|
||||
}
|
||||
|
||||
bool FullResetAtEchoPathChange() {
|
||||
return !field_trial::IsEnabled("WebRTC-Aec3AecStateFullResetKillSwitch");
|
||||
}
|
||||
|
||||
bool SubtractorAnalyzerResetAtEchoPathChange() {
|
||||
return !field_trial::IsEnabled(
|
||||
"WebRTC-Aec3AecStateSubtractorAnalyzerResetKillSwitch");
|
||||
}
|
||||
|
||||
void ComputeAvgRenderReverb(
|
||||
const SpectrumBuffer& spectrum_buffer,
|
||||
int delay_blocks,
|
||||
float reverb_decay,
|
||||
ReverbModel* reverb_model,
|
||||
rtc::ArrayView<float, kFftLengthBy2Plus1> reverb_power_spectrum) {
|
||||
RTC_DCHECK(reverb_model);
|
||||
const size_t num_render_channels = spectrum_buffer.buffer[0].size();
|
||||
int idx_at_delay =
|
||||
spectrum_buffer.OffsetIndex(spectrum_buffer.read, delay_blocks);
|
||||
int idx_past = spectrum_buffer.IncIndex(idx_at_delay);
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> X2_data;
|
||||
rtc::ArrayView<const float> X2;
|
||||
if (num_render_channels > 1) {
|
||||
auto average_channels =
|
||||
[](size_t num_render_channels,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
spectrum_band_0,
|
||||
rtc::ArrayView<float, kFftLengthBy2Plus1> render_power) {
|
||||
std::fill(render_power.begin(), render_power.end(), 0.f);
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
render_power[k] += spectrum_band_0[ch][k];
|
||||
}
|
||||
}
|
||||
const float normalizer = 1.f / num_render_channels;
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
render_power[k] *= normalizer;
|
||||
}
|
||||
};
|
||||
average_channels(num_render_channels, spectrum_buffer.buffer[idx_past],
|
||||
X2_data);
|
||||
reverb_model->UpdateReverbNoFreqShaping(
|
||||
X2_data, /*power_spectrum_scaling=*/1.0f, reverb_decay);
|
||||
|
||||
average_channels(num_render_channels, spectrum_buffer.buffer[idx_at_delay],
|
||||
X2_data);
|
||||
X2 = X2_data;
|
||||
} else {
|
||||
reverb_model->UpdateReverbNoFreqShaping(
|
||||
spectrum_buffer.buffer[idx_past][/*channel=*/0],
|
||||
/*power_spectrum_scaling=*/1.0f, reverb_decay);
|
||||
|
||||
X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0];
|
||||
}
|
||||
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
|
||||
reverb_model->reverb();
|
||||
for (size_t k = 0; k < X2.size(); ++k) {
|
||||
reverb_power_spectrum[k] = X2[k] + reverb_power[k];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::atomic<int> AecState::instance_count_(0);
|
||||
|
||||
void AecState::GetResidualEchoScaling(
|
||||
rtc::ArrayView<float> residual_scaling) const {
|
||||
bool filter_has_had_time_to_converge;
|
||||
if (config_.filter.conservative_initial_phase) {
|
||||
filter_has_had_time_to_converge =
|
||||
strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond;
|
||||
} else {
|
||||
filter_has_had_time_to_converge =
|
||||
strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond;
|
||||
}
|
||||
echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge,
|
||||
residual_scaling);
|
||||
}
|
||||
|
||||
AecState::AecState(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
config_(config),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
deactivate_initial_state_reset_at_echo_path_change_(
|
||||
DeactivateInitialStateResetAtEchoPathChange()),
|
||||
full_reset_at_echo_path_change_(FullResetAtEchoPathChange()),
|
||||
subtractor_analyzer_reset_at_echo_path_change_(
|
||||
SubtractorAnalyzerResetAtEchoPathChange()),
|
||||
initial_state_(config_),
|
||||
delay_state_(config_, num_capture_channels_),
|
||||
transparent_state_(TransparentMode::Create(config_)),
|
||||
filter_quality_state_(config_, num_capture_channels_),
|
||||
erl_estimator_(2 * kNumBlocksPerSecond),
|
||||
erle_estimator_(2 * kNumBlocksPerSecond, config_, num_capture_channels_),
|
||||
filter_analyzer_(config_, num_capture_channels_),
|
||||
echo_audibility_(
|
||||
config_.echo_audibility.use_stationarity_properties_at_init),
|
||||
reverb_model_estimator_(config_, num_capture_channels_),
|
||||
subtractor_output_analyzer_(num_capture_channels_) {}
|
||||
|
||||
AecState::~AecState() = default;
|
||||
|
||||
void AecState::HandleEchoPathChange(
|
||||
const EchoPathVariability& echo_path_variability) {
|
||||
const auto full_reset = [&]() {
|
||||
filter_analyzer_.Reset();
|
||||
capture_signal_saturation_ = false;
|
||||
strong_not_saturated_render_blocks_ = 0;
|
||||
blocks_with_active_render_ = 0;
|
||||
if (!deactivate_initial_state_reset_at_echo_path_change_) {
|
||||
initial_state_.Reset();
|
||||
}
|
||||
if (transparent_state_) {
|
||||
transparent_state_->Reset();
|
||||
}
|
||||
erle_estimator_.Reset(true);
|
||||
erl_estimator_.Reset();
|
||||
filter_quality_state_.Reset();
|
||||
};
|
||||
|
||||
// TODO(peah): Refine the reset scheme according to the type of gain and
|
||||
// delay adjustment.
|
||||
|
||||
if (full_reset_at_echo_path_change_ &&
|
||||
echo_path_variability.delay_change !=
|
||||
EchoPathVariability::DelayAdjustment::kNone) {
|
||||
full_reset();
|
||||
} else if (echo_path_variability.gain_change) {
|
||||
erle_estimator_.Reset(false);
|
||||
}
|
||||
if (subtractor_analyzer_reset_at_echo_path_change_) {
|
||||
subtractor_output_analyzer_.HandleEchoPathChange();
|
||||
}
|
||||
}
|
||||
|
||||
void AecState::Update(
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
adaptive_filter_frequency_responses,
|
||||
rtc::ArrayView<const std::vector<float>> adaptive_filter_impulse_responses,
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output) {
|
||||
RTC_DCHECK_EQ(num_capture_channels_, Y2.size());
|
||||
RTC_DCHECK_EQ(num_capture_channels_, subtractor_output.size());
|
||||
RTC_DCHECK_EQ(num_capture_channels_,
|
||||
adaptive_filter_frequency_responses.size());
|
||||
RTC_DCHECK_EQ(num_capture_channels_,
|
||||
adaptive_filter_impulse_responses.size());
|
||||
|
||||
// Analyze the filter outputs and filters.
|
||||
bool any_filter_converged;
|
||||
bool any_coarse_filter_converged;
|
||||
bool all_filters_diverged;
|
||||
subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged,
|
||||
&any_coarse_filter_converged,
|
||||
&all_filters_diverged);
|
||||
|
||||
bool any_filter_consistent;
|
||||
float max_echo_path_gain;
|
||||
filter_analyzer_.Update(adaptive_filter_impulse_responses, render_buffer,
|
||||
&any_filter_consistent, &max_echo_path_gain);
|
||||
|
||||
// Estimate the direct path delay of the filter.
|
||||
if (config_.filter.use_linear_filter) {
|
||||
delay_state_.Update(filter_analyzer_.FilterDelaysBlocks(), external_delay,
|
||||
strong_not_saturated_render_blocks_);
|
||||
}
|
||||
|
||||
const Block& aligned_render_block =
|
||||
render_buffer.GetBlock(-delay_state_.MinDirectPathFilterDelay());
|
||||
|
||||
// Update render counters.
|
||||
bool active_render = false;
|
||||
for (int ch = 0; ch < aligned_render_block.NumChannels(); ++ch) {
|
||||
const float render_energy =
|
||||
std::inner_product(aligned_render_block.begin(/*block=*/0, ch),
|
||||
aligned_render_block.end(/*block=*/0, ch),
|
||||
aligned_render_block.begin(/*block=*/0, ch), 0.f);
|
||||
if (render_energy > (config_.render_levels.active_render_limit *
|
||||
config_.render_levels.active_render_limit) *
|
||||
kFftLengthBy2) {
|
||||
active_render = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
blocks_with_active_render_ += active_render ? 1 : 0;
|
||||
strong_not_saturated_render_blocks_ +=
|
||||
active_render && !SaturatedCapture() ? 1 : 0;
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> avg_render_spectrum_with_reverb;
|
||||
|
||||
ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(),
|
||||
delay_state_.MinDirectPathFilterDelay(),
|
||||
ReverbDecay(/*mild=*/false), &avg_render_reverb_,
|
||||
avg_render_spectrum_with_reverb);
|
||||
|
||||
if (config_.echo_audibility.use_stationarity_properties) {
|
||||
// Update the echo audibility evaluator.
|
||||
echo_audibility_.Update(render_buffer, avg_render_reverb_.reverb(),
|
||||
delay_state_.MinDirectPathFilterDelay(),
|
||||
delay_state_.ExternalDelayReported());
|
||||
}
|
||||
|
||||
// Update the ERL and ERLE measures.
|
||||
if (initial_state_.TransitionTriggered()) {
|
||||
erle_estimator_.Reset(false);
|
||||
}
|
||||
|
||||
erle_estimator_.Update(render_buffer, adaptive_filter_frequency_responses,
|
||||
avg_render_spectrum_with_reverb, Y2, E2_refined,
|
||||
subtractor_output_analyzer_.ConvergedFilters());
|
||||
|
||||
erl_estimator_.Update(
|
||||
subtractor_output_analyzer_.ConvergedFilters(),
|
||||
render_buffer.Spectrum(delay_state_.MinDirectPathFilterDelay()), Y2);
|
||||
|
||||
// Detect and flag echo saturation.
|
||||
if (config_.ep_strength.echo_can_saturate) {
|
||||
saturation_detector_.Update(aligned_render_block, SaturatedCapture(),
|
||||
UsableLinearEstimate(), subtractor_output,
|
||||
max_echo_path_gain);
|
||||
} else {
|
||||
RTC_DCHECK(!saturation_detector_.SaturatedEcho());
|
||||
}
|
||||
|
||||
// Update the decision on whether to use the initial state parameter set.
|
||||
initial_state_.Update(active_render, SaturatedCapture());
|
||||
|
||||
// Detect whether the transparent mode should be activated.
|
||||
if (transparent_state_) {
|
||||
transparent_state_->Update(
|
||||
delay_state_.MinDirectPathFilterDelay(), any_filter_consistent,
|
||||
any_filter_converged, any_coarse_filter_converged, all_filters_diverged,
|
||||
active_render, SaturatedCapture());
|
||||
}
|
||||
|
||||
// Analyze the quality of the filter.
|
||||
filter_quality_state_.Update(active_render, TransparentModeActive(),
|
||||
SaturatedCapture(), external_delay,
|
||||
any_filter_converged);
|
||||
|
||||
// Update the reverb estimate.
|
||||
const bool stationary_block =
|
||||
config_.echo_audibility.use_stationarity_properties &&
|
||||
echo_audibility_.IsBlockStationary();
|
||||
|
||||
reverb_model_estimator_.Update(
|
||||
filter_analyzer_.GetAdjustedFilters(),
|
||||
adaptive_filter_frequency_responses,
|
||||
erle_estimator_.GetInstLinearQualityEstimates(),
|
||||
delay_state_.DirectPathFilterDelays(),
|
||||
filter_quality_state_.UsableLinearFilterOutputs(), stationary_block);
|
||||
|
||||
erle_estimator_.Dump(data_dumper_);
|
||||
reverb_model_estimator_.Dump(data_dumper_.get());
|
||||
data_dumper_->DumpRaw("aec3_active_render", active_render);
|
||||
data_dumper_->DumpRaw("aec3_erl", Erl());
|
||||
data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());
|
||||
data_dumper_->DumpRaw("aec3_erle", Erle(/*onset_compensated=*/false)[0]);
|
||||
data_dumper_->DumpRaw("aec3_erle_onset_compensated",
|
||||
Erle(/*onset_compensated=*/true)[0]);
|
||||
data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());
|
||||
data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive());
|
||||
data_dumper_->DumpRaw("aec3_filter_delay",
|
||||
filter_analyzer_.MinFilterDelayBlocks());
|
||||
|
||||
data_dumper_->DumpRaw("aec3_any_filter_consistent", any_filter_consistent);
|
||||
data_dumper_->DumpRaw("aec3_initial_state",
|
||||
initial_state_.InitialStateActive());
|
||||
data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture());
|
||||
data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho());
|
||||
data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged);
|
||||
data_dumper_->DumpRaw("aec3_any_coarse_filter_converged",
|
||||
any_coarse_filter_converged);
|
||||
data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged);
|
||||
|
||||
data_dumper_->DumpRaw("aec3_external_delay_avaliable",
|
||||
external_delay ? 1 : 0);
|
||||
data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est",
|
||||
GetReverbFrequencyResponse());
|
||||
data_dumper_->DumpRaw("aec3_subtractor_y2", subtractor_output[0].y2);
|
||||
data_dumper_->DumpRaw("aec3_subtractor_e2_coarse",
|
||||
subtractor_output[0].e2_coarse);
|
||||
data_dumper_->DumpRaw("aec3_subtractor_e2_refined",
|
||||
subtractor_output[0].e2_refined);
|
||||
}
|
||||
|
||||
AecState::InitialState::InitialState(const EchoCanceller3Config& config)
|
||||
: conservative_initial_phase_(config.filter.conservative_initial_phase),
|
||||
initial_state_seconds_(config.filter.initial_state_seconds) {
|
||||
Reset();
|
||||
}
|
||||
void AecState::InitialState::InitialState::Reset() {
|
||||
initial_state_ = true;
|
||||
strong_not_saturated_render_blocks_ = 0;
|
||||
}
|
||||
void AecState::InitialState::InitialState::Update(bool active_render,
|
||||
bool saturated_capture) {
|
||||
strong_not_saturated_render_blocks_ +=
|
||||
active_render && !saturated_capture ? 1 : 0;
|
||||
|
||||
// Flag whether the initial state is still active.
|
||||
bool prev_initial_state = initial_state_;
|
||||
if (conservative_initial_phase_) {
|
||||
initial_state_ =
|
||||
strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond;
|
||||
} else {
|
||||
initial_state_ = strong_not_saturated_render_blocks_ <
|
||||
initial_state_seconds_ * kNumBlocksPerSecond;
|
||||
}
|
||||
|
||||
// Flag whether the transition from the initial state has started.
|
||||
transition_triggered_ = !initial_state_ && prev_initial_state;
|
||||
}
|
||||
|
||||
AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
|
||||
filter_delays_blocks_(num_capture_channels, delay_headroom_blocks_),
|
||||
min_filter_delay_(delay_headroom_blocks_) {}
|
||||
|
||||
void AecState::FilterDelay::Update(
|
||||
rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
size_t blocks_with_proper_filter_adaptation) {
|
||||
// Update the delay based on the external delay.
|
||||
if (external_delay &&
|
||||
(!external_delay_ || external_delay_->delay != external_delay->delay)) {
|
||||
external_delay_ = external_delay;
|
||||
external_delay_reported_ = true;
|
||||
}
|
||||
|
||||
// Override the estimated delay if it is not certain that the filter has had
|
||||
// time to converge.
|
||||
const bool delay_estimator_may_not_have_converged =
|
||||
blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond;
|
||||
if (delay_estimator_may_not_have_converged && external_delay_) {
|
||||
const int delay_guess = delay_headroom_blocks_;
|
||||
std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(),
|
||||
delay_guess);
|
||||
} else {
|
||||
RTC_DCHECK_EQ(filter_delays_blocks_.size(),
|
||||
analyzer_filter_delay_estimates_blocks.size());
|
||||
std::copy(analyzer_filter_delay_estimates_blocks.begin(),
|
||||
analyzer_filter_delay_estimates_blocks.end(),
|
||||
filter_delays_blocks_.begin());
|
||||
}
|
||||
|
||||
min_filter_delay_ = *std::min_element(filter_delays_blocks_.begin(),
|
||||
filter_delays_blocks_.end());
|
||||
}
|
||||
|
||||
AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer(
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: use_linear_filter_(config.filter.use_linear_filter),
|
||||
usable_linear_filter_estimates_(num_capture_channels, false) {}
|
||||
|
||||
void AecState::FilteringQualityAnalyzer::Reset() {
|
||||
std::fill(usable_linear_filter_estimates_.begin(),
|
||||
usable_linear_filter_estimates_.end(), false);
|
||||
overall_usable_linear_estimates_ = false;
|
||||
filter_update_blocks_since_reset_ = 0;
|
||||
}
|
||||
|
||||
void AecState::FilteringQualityAnalyzer::Update(
|
||||
bool active_render,
|
||||
bool transparent_mode,
|
||||
bool saturated_capture,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
bool any_filter_converged) {
|
||||
// Update blocks counter.
|
||||
const bool filter_update = active_render && !saturated_capture;
|
||||
filter_update_blocks_since_reset_ += filter_update ? 1 : 0;
|
||||
filter_update_blocks_since_start_ += filter_update ? 1 : 0;
|
||||
|
||||
// Store convergence flag when observed.
|
||||
convergence_seen_ = convergence_seen_ || any_filter_converged;
|
||||
|
||||
// Verify requirements for achieving a decent filter. The requirements for
|
||||
// filter adaptation at call startup are more restrictive than after an
|
||||
// in-call reset.
|
||||
const bool sufficient_data_to_converge_at_startup =
|
||||
filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f;
|
||||
const bool sufficient_data_to_converge_at_reset =
|
||||
sufficient_data_to_converge_at_startup &&
|
||||
filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f;
|
||||
|
||||
// The linear filter can only be used if it has had time to converge.
|
||||
overall_usable_linear_estimates_ = sufficient_data_to_converge_at_startup &&
|
||||
sufficient_data_to_converge_at_reset;
|
||||
|
||||
// The linear filter can only be used if an external delay or convergence have
|
||||
// been identified
|
||||
overall_usable_linear_estimates_ =
|
||||
overall_usable_linear_estimates_ && (external_delay || convergence_seen_);
|
||||
|
||||
// If transparent mode is on, deactivate usign the linear filter.
|
||||
overall_usable_linear_estimates_ =
|
||||
overall_usable_linear_estimates_ && !transparent_mode;
|
||||
|
||||
if (use_linear_filter_) {
|
||||
std::fill(usable_linear_filter_estimates_.begin(),
|
||||
usable_linear_filter_estimates_.end(),
|
||||
overall_usable_linear_estimates_);
|
||||
}
|
||||
}
|
||||
|
||||
void AecState::SaturationDetector::Update(
|
||||
const Block& x,
|
||||
bool saturated_capture,
|
||||
bool usable_linear_estimate,
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output,
|
||||
float echo_path_gain) {
|
||||
saturated_echo_ = false;
|
||||
if (!saturated_capture) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (usable_linear_estimate) {
|
||||
constexpr float kSaturationThreshold = 20000.f;
|
||||
for (size_t ch = 0; ch < subtractor_output.size(); ++ch) {
|
||||
saturated_echo_ =
|
||||
saturated_echo_ ||
|
||||
(subtractor_output[ch].s_refined_max_abs > kSaturationThreshold ||
|
||||
subtractor_output[ch].s_coarse_max_abs > kSaturationThreshold);
|
||||
}
|
||||
} else {
|
||||
float max_sample = 0.f;
|
||||
for (int ch = 0; ch < x.NumChannels(); ++ch) {
|
||||
rtc::ArrayView<const float, kBlockSize> x_ch = x.View(/*band=*/0, ch);
|
||||
for (float sample : x_ch) {
|
||||
max_sample = std::max(max_sample, fabsf(sample));
|
||||
}
|
||||
}
|
||||
|
||||
const float kMargin = 10.f;
|
||||
float peak_echo_amplitude = max_sample * echo_path_gain * kMargin;
|
||||
saturated_echo_ = saturated_echo_ || peak_echo_amplitude > 32000;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
300
VocieProcess/modules/audio_processing/aec3/aec_state.h
Normal file
300
VocieProcess/modules/audio_processing/aec3/aec_state.h
Normal file
@ -0,0 +1,300 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/echo_audibility.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/erl_estimator.h"
|
||||
#include "modules/audio_processing/aec3/erle_estimator.h"
|
||||
#include "modules/audio_processing/aec3/filter_analyzer.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/reverb_model_estimator.h"
|
||||
#include "modules/audio_processing/aec3/subtractor_output.h"
|
||||
#include "modules/audio_processing/aec3/subtractor_output_analyzer.h"
|
||||
#include "modules/audio_processing/aec3/transparent_mode.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
// Handles the state and the conditions for the echo removal functionality.
|
||||
class AecState {
|
||||
public:
|
||||
AecState(const EchoCanceller3Config& config, size_t num_capture_channels);
|
||||
~AecState();
|
||||
|
||||
// Returns whether the echo subtractor can be used to determine the residual
|
||||
// echo.
|
||||
bool UsableLinearEstimate() const {
|
||||
return filter_quality_state_.LinearFilterUsable() &&
|
||||
config_.filter.use_linear_filter;
|
||||
}
|
||||
|
||||
// Returns whether the echo subtractor output should be used as output.
|
||||
bool UseLinearFilterOutput() const {
|
||||
return filter_quality_state_.LinearFilterUsable() &&
|
||||
config_.filter.use_linear_filter;
|
||||
}
|
||||
|
||||
// Returns whether the render signal is currently active.
|
||||
bool ActiveRender() const { return blocks_with_active_render_ > 200; }
|
||||
|
||||
// Returns the appropriate scaling of the residual echo to match the
|
||||
// audibility.
|
||||
void GetResidualEchoScaling(rtc::ArrayView<float> residual_scaling) const;
|
||||
|
||||
// Returns whether the stationary properties of the signals are used in the
|
||||
// aec.
|
||||
bool UseStationarityProperties() const {
|
||||
return config_.echo_audibility.use_stationarity_properties;
|
||||
}
|
||||
|
||||
// Returns the ERLE.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
|
||||
bool onset_compensated) const {
|
||||
return erle_estimator_.Erle(onset_compensated);
|
||||
}
|
||||
|
||||
// Returns the non-capped ERLE.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded()
|
||||
const {
|
||||
return erle_estimator_.ErleUnbounded();
|
||||
}
|
||||
|
||||
// Returns the fullband ERLE estimate in log2 units.
|
||||
float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); }
|
||||
|
||||
// Returns the ERL.
|
||||
const std::array<float, kFftLengthBy2Plus1>& Erl() const {
|
||||
return erl_estimator_.Erl();
|
||||
}
|
||||
|
||||
// Returns the time-domain ERL.
|
||||
float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); }
|
||||
|
||||
// Returns the delay estimate based on the linear filter.
|
||||
int MinDirectPathFilterDelay() const {
|
||||
return delay_state_.MinDirectPathFilterDelay();
|
||||
}
|
||||
|
||||
// Returns whether the capture signal is saturated.
|
||||
bool SaturatedCapture() const { return capture_signal_saturation_; }
|
||||
|
||||
// Returns whether the echo signal is saturated.
|
||||
bool SaturatedEcho() const { return saturation_detector_.SaturatedEcho(); }
|
||||
|
||||
// Updates the capture signal saturation.
|
||||
void UpdateCaptureSaturation(bool capture_signal_saturation) {
|
||||
capture_signal_saturation_ = capture_signal_saturation;
|
||||
}
|
||||
|
||||
// Returns whether the transparent mode is active
|
||||
bool TransparentModeActive() const {
|
||||
return transparent_state_ && transparent_state_->Active();
|
||||
}
|
||||
|
||||
// Takes appropriate action at an echo path change.
|
||||
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
|
||||
|
||||
// Returns the decay factor for the echo reverberation. The parameter `mild`
|
||||
// indicates which exponential decay to return. The default one or a milder
|
||||
// one that can be used during nearend regions.
|
||||
float ReverbDecay(bool mild) const {
|
||||
return reverb_model_estimator_.ReverbDecay(mild);
|
||||
}
|
||||
|
||||
// Return the frequency response of the reverberant echo.
|
||||
rtc::ArrayView<const float> GetReverbFrequencyResponse() const {
|
||||
return reverb_model_estimator_.GetReverbFrequencyResponse();
|
||||
}
|
||||
|
||||
// Returns whether the transition for going out of the initial stated has
|
||||
// been triggered.
|
||||
bool TransitionTriggered() const {
|
||||
return initial_state_.TransitionTriggered();
|
||||
}
|
||||
|
||||
// Updates the aec state.
|
||||
// TODO(bugs.webrtc.org/10913): Compute multi-channel ERL.
|
||||
void Update(
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
adaptive_filter_frequency_responses,
|
||||
rtc::ArrayView<const std::vector<float>>
|
||||
adaptive_filter_impulse_responses,
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output);
|
||||
|
||||
// Returns filter length in blocks.
|
||||
int FilterLengthBlocks() const {
|
||||
// All filters have the same length, so arbitrarily return channel 0 length.
|
||||
return filter_analyzer_.FilterLengthBlocks();
|
||||
}
|
||||
|
||||
private:
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const EchoCanceller3Config config_;
|
||||
const size_t num_capture_channels_;
|
||||
const bool deactivate_initial_state_reset_at_echo_path_change_;
|
||||
const bool full_reset_at_echo_path_change_;
|
||||
const bool subtractor_analyzer_reset_at_echo_path_change_;
|
||||
|
||||
// Class for controlling the transition from the intial state, which in turn
|
||||
// controls when the filter parameters for the initial state should be used.
|
||||
class InitialState {
|
||||
public:
|
||||
explicit InitialState(const EchoCanceller3Config& config);
|
||||
// Resets the state to again begin in the initial state.
|
||||
void Reset();
|
||||
|
||||
// Updates the state based on new data.
|
||||
void Update(bool active_render, bool saturated_capture);
|
||||
|
||||
// Returns whether the initial state is active or not.
|
||||
bool InitialStateActive() const { return initial_state_; }
|
||||
|
||||
// Returns that the transition from the initial state has was started.
|
||||
bool TransitionTriggered() const { return transition_triggered_; }
|
||||
|
||||
private:
|
||||
const bool conservative_initial_phase_;
|
||||
const float initial_state_seconds_;
|
||||
bool transition_triggered_ = false;
|
||||
bool initial_state_ = true;
|
||||
size_t strong_not_saturated_render_blocks_ = 0;
|
||||
} initial_state_;
|
||||
|
||||
// Class for choosing the direct-path delay relative to the beginning of the
|
||||
// filter, as well as any other data related to the delay used within
|
||||
// AecState.
|
||||
class FilterDelay {
|
||||
public:
|
||||
FilterDelay(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
|
||||
// Returns whether an external delay has been reported to the AecState (from
|
||||
// the delay estimator).
|
||||
bool ExternalDelayReported() const { return external_delay_reported_; }
|
||||
|
||||
// Returns the delay in blocks relative to the beginning of the filter that
|
||||
// corresponds to the direct path of the echo.
|
||||
rtc::ArrayView<const int> DirectPathFilterDelays() const {
|
||||
return filter_delays_blocks_;
|
||||
}
|
||||
|
||||
// Returns the minimum delay among the direct path delays relative to the
|
||||
// beginning of the filter
|
||||
int MinDirectPathFilterDelay() const { return min_filter_delay_; }
|
||||
|
||||
// Updates the delay estimates based on new data.
|
||||
void Update(
|
||||
rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
size_t blocks_with_proper_filter_adaptation);
|
||||
|
||||
private:
|
||||
const int delay_headroom_blocks_;
|
||||
bool external_delay_reported_ = false;
|
||||
std::vector<int> filter_delays_blocks_;
|
||||
int min_filter_delay_;
|
||||
absl::optional<DelayEstimate> external_delay_;
|
||||
} delay_state_;
|
||||
|
||||
// Classifier for toggling transparent mode when there is no echo.
|
||||
std::unique_ptr<TransparentMode> transparent_state_;
|
||||
|
||||
// Class for analyzing how well the linear filter is, and can be expected to,
|
||||
// perform on the current signals. The purpose of this is for using to
|
||||
// select the echo suppression functionality as well as the input to the echo
|
||||
// suppressor.
|
||||
class FilteringQualityAnalyzer {
|
||||
public:
|
||||
FilteringQualityAnalyzer(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
|
||||
// Returns whether the linear filter can be used for the echo
|
||||
// canceller output.
|
||||
bool LinearFilterUsable() const { return overall_usable_linear_estimates_; }
|
||||
|
||||
// Returns whether an individual filter output can be used for the echo
|
||||
// canceller output.
|
||||
const std::vector<bool>& UsableLinearFilterOutputs() const {
|
||||
return usable_linear_filter_estimates_;
|
||||
}
|
||||
|
||||
// Resets the state of the analyzer.
|
||||
void Reset();
|
||||
|
||||
// Updates the analysis based on new data.
|
||||
void Update(bool active_render,
|
||||
bool transparent_mode,
|
||||
bool saturated_capture,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
bool any_filter_converged);
|
||||
|
||||
private:
|
||||
const bool use_linear_filter_;
|
||||
bool overall_usable_linear_estimates_ = false;
|
||||
size_t filter_update_blocks_since_reset_ = 0;
|
||||
size_t filter_update_blocks_since_start_ = 0;
|
||||
bool convergence_seen_ = false;
|
||||
std::vector<bool> usable_linear_filter_estimates_;
|
||||
} filter_quality_state_;
|
||||
|
||||
// Class for detecting whether the echo is to be considered to be
|
||||
// saturated.
|
||||
class SaturationDetector {
|
||||
public:
|
||||
// Returns whether the echo is to be considered saturated.
|
||||
bool SaturatedEcho() const { return saturated_echo_; }
|
||||
|
||||
// Updates the detection decision based on new data.
|
||||
void Update(const Block& x,
|
||||
bool saturated_capture,
|
||||
bool usable_linear_estimate,
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output,
|
||||
float echo_path_gain);
|
||||
|
||||
private:
|
||||
bool saturated_echo_ = false;
|
||||
} saturation_detector_;
|
||||
|
||||
ErlEstimator erl_estimator_;
|
||||
ErleEstimator erle_estimator_;
|
||||
size_t strong_not_saturated_render_blocks_ = 0;
|
||||
size_t blocks_with_active_render_ = 0;
|
||||
bool capture_signal_saturation_ = false;
|
||||
FilterAnalyzer filter_analyzer_;
|
||||
EchoAudibility echo_audibility_;
|
||||
ReverbModelEstimator reverb_model_estimator_;
|
||||
ReverbModel avg_render_reverb_;
|
||||
SubtractorOutputAnalyzer subtractor_output_analyzer_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
|
163
VocieProcess/modules/audio_processing/aec3/alignment_mixer.cc
Normal file
163
VocieProcess/modules/audio_processing/aec3/alignment_mixer.cc
Normal file
@ -0,0 +1,163 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/alignment_mixer.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
AlignmentMixer::MixingVariant ChooseMixingVariant(bool downmix,
|
||||
bool adaptive_selection,
|
||||
int num_channels) {
|
||||
RTC_DCHECK(!(adaptive_selection && downmix));
|
||||
RTC_DCHECK_LT(0, num_channels);
|
||||
|
||||
if (num_channels == 1) {
|
||||
return AlignmentMixer::MixingVariant::kFixed;
|
||||
}
|
||||
if (downmix) {
|
||||
return AlignmentMixer::MixingVariant::kDownmix;
|
||||
}
|
||||
if (adaptive_selection) {
|
||||
return AlignmentMixer::MixingVariant::kAdaptive;
|
||||
}
|
||||
return AlignmentMixer::MixingVariant::kFixed;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
AlignmentMixer::AlignmentMixer(
|
||||
size_t num_channels,
|
||||
const EchoCanceller3Config::Delay::AlignmentMixing& config)
|
||||
: AlignmentMixer(num_channels,
|
||||
config.downmix,
|
||||
config.adaptive_selection,
|
||||
config.activity_power_threshold,
|
||||
config.prefer_first_two_channels) {}
|
||||
|
||||
AlignmentMixer::AlignmentMixer(size_t num_channels,
|
||||
bool downmix,
|
||||
bool adaptive_selection,
|
||||
float activity_power_threshold,
|
||||
bool prefer_first_two_channels)
|
||||
: num_channels_(num_channels),
|
||||
one_by_num_channels_(1.f / num_channels_),
|
||||
excitation_energy_threshold_(kBlockSize * activity_power_threshold),
|
||||
prefer_first_two_channels_(prefer_first_two_channels),
|
||||
selection_variant_(
|
||||
ChooseMixingVariant(downmix, adaptive_selection, num_channels_)) {
|
||||
if (selection_variant_ == MixingVariant::kAdaptive) {
|
||||
std::fill(strong_block_counters_.begin(), strong_block_counters_.end(), 0);
|
||||
cumulative_energies_.resize(num_channels_);
|
||||
std::fill(cumulative_energies_.begin(), cumulative_energies_.end(), 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
void AlignmentMixer::ProduceOutput(const Block& x,
|
||||
rtc::ArrayView<float, kBlockSize> y) {
|
||||
RTC_DCHECK_EQ(x.NumChannels(), num_channels_);
|
||||
|
||||
if (selection_variant_ == MixingVariant::kDownmix) {
|
||||
Downmix(x, y);
|
||||
return;
|
||||
}
|
||||
|
||||
int ch = selection_variant_ == MixingVariant::kFixed ? 0 : SelectChannel(x);
|
||||
|
||||
RTC_DCHECK_GT(x.NumChannels(), ch);
|
||||
std::copy(x.begin(/*band=*/0, ch), x.end(/*band=*/0, ch), y.begin());
|
||||
}
|
||||
|
||||
void AlignmentMixer::Downmix(const Block& x,
|
||||
rtc::ArrayView<float, kBlockSize> y) const {
|
||||
RTC_DCHECK_EQ(x.NumChannels(), num_channels_);
|
||||
RTC_DCHECK_GE(num_channels_, 2);
|
||||
std::memcpy(&y[0], x.View(/*band=*/0, /*channel=*/0).data(),
|
||||
kBlockSize * sizeof(y[0]));
|
||||
for (size_t ch = 1; ch < num_channels_; ++ch) {
|
||||
const auto x_ch = x.View(/*band=*/0, ch);
|
||||
for (size_t i = 0; i < kBlockSize; ++i) {
|
||||
y[i] += x_ch[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < kBlockSize; ++i) {
|
||||
y[i] *= one_by_num_channels_;
|
||||
}
|
||||
}
|
||||
|
||||
int AlignmentMixer::SelectChannel(const Block& x) {
|
||||
RTC_DCHECK_EQ(x.NumChannels(), num_channels_);
|
||||
RTC_DCHECK_GE(num_channels_, 2);
|
||||
RTC_DCHECK_EQ(cumulative_energies_.size(), num_channels_);
|
||||
|
||||
constexpr size_t kBlocksToChooseLeftOrRight =
|
||||
static_cast<size_t>(0.5f * kNumBlocksPerSecond);
|
||||
const bool good_signal_in_left_or_right =
|
||||
prefer_first_two_channels_ &&
|
||||
(strong_block_counters_[0] > kBlocksToChooseLeftOrRight ||
|
||||
strong_block_counters_[1] > kBlocksToChooseLeftOrRight);
|
||||
|
||||
const int num_ch_to_analyze =
|
||||
good_signal_in_left_or_right ? 2 : num_channels_;
|
||||
|
||||
constexpr int kNumBlocksBeforeEnergySmoothing = 60 * kNumBlocksPerSecond;
|
||||
++block_counter_;
|
||||
|
||||
for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
|
||||
float x2_sum = 0.f;
|
||||
rtc::ArrayView<const float, kBlockSize> x_ch = x.View(/*band=*/0, ch);
|
||||
for (size_t i = 0; i < kBlockSize; ++i) {
|
||||
x2_sum += x_ch[i] * x_ch[i];
|
||||
}
|
||||
|
||||
if (ch < 2 && x2_sum > excitation_energy_threshold_) {
|
||||
++strong_block_counters_[ch];
|
||||
}
|
||||
|
||||
if (block_counter_ <= kNumBlocksBeforeEnergySmoothing) {
|
||||
cumulative_energies_[ch] += x2_sum;
|
||||
} else {
|
||||
constexpr float kSmoothing = 1.f / (10 * kNumBlocksPerSecond);
|
||||
cumulative_energies_[ch] +=
|
||||
kSmoothing * (x2_sum - cumulative_energies_[ch]);
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize the energies to allow the energy computations to from now be
|
||||
// based on smoothing.
|
||||
if (block_counter_ == kNumBlocksBeforeEnergySmoothing) {
|
||||
constexpr float kOneByNumBlocksBeforeEnergySmoothing =
|
||||
1.f / kNumBlocksBeforeEnergySmoothing;
|
||||
for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
|
||||
cumulative_energies_[ch] *= kOneByNumBlocksBeforeEnergySmoothing;
|
||||
}
|
||||
}
|
||||
|
||||
int strongest_ch = 0;
|
||||
for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
|
||||
if (cumulative_energies_[ch] > cumulative_energies_[strongest_ch]) {
|
||||
strongest_ch = ch;
|
||||
}
|
||||
}
|
||||
|
||||
if ((good_signal_in_left_or_right && selected_channel_ > 1) ||
|
||||
cumulative_energies_[strongest_ch] >
|
||||
2.f * cumulative_energies_[selected_channel_]) {
|
||||
selected_channel_ = strongest_ch;
|
||||
}
|
||||
|
||||
return selected_channel_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
57
VocieProcess/modules/audio_processing/aec3/alignment_mixer.h
Normal file
57
VocieProcess/modules/audio_processing/aec3/alignment_mixer.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Performs channel conversion to mono for the purpose of providing a decent
|
||||
// mono input for the delay estimation. This is achieved by analyzing all
|
||||
// incoming channels and produce one single channel output.
|
||||
class AlignmentMixer {
|
||||
public:
|
||||
AlignmentMixer(size_t num_channels,
|
||||
const EchoCanceller3Config::Delay::AlignmentMixing& config);
|
||||
|
||||
AlignmentMixer(size_t num_channels,
|
||||
bool downmix,
|
||||
bool adaptive_selection,
|
||||
float excitation_limit,
|
||||
bool prefer_first_two_channels);
|
||||
|
||||
void ProduceOutput(const Block& x, rtc::ArrayView<float, kBlockSize> y);
|
||||
|
||||
enum class MixingVariant { kDownmix, kAdaptive, kFixed };
|
||||
|
||||
private:
|
||||
const size_t num_channels_;
|
||||
const float one_by_num_channels_;
|
||||
const float excitation_energy_threshold_;
|
||||
const bool prefer_first_two_channels_;
|
||||
const MixingVariant selection_variant_;
|
||||
std::array<size_t, 2> strong_block_counters_;
|
||||
std::vector<float> cumulative_energies_;
|
||||
int selected_channel_ = 0;
|
||||
size_t block_counter_ = 0;
|
||||
|
||||
void Downmix(const Block& x, rtc::ArrayView<float, kBlockSize> y) const;
|
||||
int SelectChannel(const Block& x);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
|
@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/api_call_jitter_metrics.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "system_wrappers/include/metrics.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
bool TimeToReportMetrics(int frames_since_last_report) {
|
||||
constexpr int kNumFramesPerSecond = 100;
|
||||
constexpr int kReportingIntervalFrames = 10 * kNumFramesPerSecond;
|
||||
return frames_since_last_report == kReportingIntervalFrames;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ApiCallJitterMetrics::Jitter::Jitter()
|
||||
: max_(0), min_(std::numeric_limits<int>::max()) {}
|
||||
|
||||
void ApiCallJitterMetrics::Jitter::Update(int num_api_calls_in_a_row) {
|
||||
min_ = std::min(min_, num_api_calls_in_a_row);
|
||||
max_ = std::max(max_, num_api_calls_in_a_row);
|
||||
}
|
||||
|
||||
void ApiCallJitterMetrics::Jitter::Reset() {
|
||||
min_ = std::numeric_limits<int>::max();
|
||||
max_ = 0;
|
||||
}
|
||||
|
||||
void ApiCallJitterMetrics::Reset() {
|
||||
render_jitter_.Reset();
|
||||
capture_jitter_.Reset();
|
||||
num_api_calls_in_a_row_ = 0;
|
||||
frames_since_last_report_ = 0;
|
||||
last_call_was_render_ = false;
|
||||
proper_call_observed_ = false;
|
||||
}
|
||||
|
||||
void ApiCallJitterMetrics::ReportRenderCall() {
|
||||
if (!last_call_was_render_) {
|
||||
// If the previous call was a capture and a proper call has been observed
|
||||
// (containing both render and capture data), storing the last number of
|
||||
// capture calls into the metrics.
|
||||
if (proper_call_observed_) {
|
||||
capture_jitter_.Update(num_api_calls_in_a_row_);
|
||||
}
|
||||
|
||||
// Reset the call counter to start counting render calls.
|
||||
num_api_calls_in_a_row_ = 0;
|
||||
}
|
||||
++num_api_calls_in_a_row_;
|
||||
last_call_was_render_ = true;
|
||||
}
|
||||
|
||||
void ApiCallJitterMetrics::ReportCaptureCall() {
|
||||
if (last_call_was_render_) {
|
||||
// If the previous call was a render and a proper call has been observed
|
||||
// (containing both render and capture data), storing the last number of
|
||||
// render calls into the metrics.
|
||||
if (proper_call_observed_) {
|
||||
render_jitter_.Update(num_api_calls_in_a_row_);
|
||||
}
|
||||
// Reset the call counter to start counting capture calls.
|
||||
num_api_calls_in_a_row_ = 0;
|
||||
|
||||
// If this statement is reached, at least one render and one capture call
|
||||
// have been observed.
|
||||
proper_call_observed_ = true;
|
||||
}
|
||||
++num_api_calls_in_a_row_;
|
||||
last_call_was_render_ = false;
|
||||
|
||||
// Only report and update jitter metrics for when a proper call, containing
|
||||
// both render and capture data, has been observed.
|
||||
if (proper_call_observed_ &&
|
||||
TimeToReportMetrics(++frames_since_last_report_)) {
|
||||
// Report jitter, where the base basic unit is frames.
|
||||
constexpr int kMaxJitterToReport = 50;
|
||||
|
||||
// Report max and min jitter for render and capture, in units of 20 ms.
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.MaxRenderJitter",
|
||||
std::min(kMaxJitterToReport, render_jitter().max()), 1,
|
||||
kMaxJitterToReport, kMaxJitterToReport);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.MinRenderJitter",
|
||||
std::min(kMaxJitterToReport, render_jitter().min()), 1,
|
||||
kMaxJitterToReport, kMaxJitterToReport);
|
||||
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.MaxCaptureJitter",
|
||||
std::min(kMaxJitterToReport, capture_jitter().max()), 1,
|
||||
kMaxJitterToReport, kMaxJitterToReport);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.MinCaptureJitter",
|
||||
std::min(kMaxJitterToReport, capture_jitter().min()), 1,
|
||||
kMaxJitterToReport, kMaxJitterToReport);
|
||||
|
||||
frames_since_last_report_ = 0;
|
||||
Reset();
|
||||
}
|
||||
}
|
||||
|
||||
bool ApiCallJitterMetrics::WillReportMetricsAtNextCapture() const {
|
||||
return TimeToReportMetrics(frames_since_last_report_ + 1);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Stores data for reporting metrics on the API call jitter.
|
||||
class ApiCallJitterMetrics {
|
||||
public:
|
||||
class Jitter {
|
||||
public:
|
||||
Jitter();
|
||||
void Update(int num_api_calls_in_a_row);
|
||||
void Reset();
|
||||
|
||||
int min() const { return min_; }
|
||||
int max() const { return max_; }
|
||||
|
||||
private:
|
||||
int max_;
|
||||
int min_;
|
||||
};
|
||||
|
||||
ApiCallJitterMetrics() { Reset(); }
|
||||
|
||||
// Update metrics for render API call.
|
||||
void ReportRenderCall();
|
||||
|
||||
// Update and periodically report metrics for capture API call.
|
||||
void ReportCaptureCall();
|
||||
|
||||
// Methods used only for testing.
|
||||
const Jitter& render_jitter() const { return render_jitter_; }
|
||||
const Jitter& capture_jitter() const { return capture_jitter_; }
|
||||
bool WillReportMetricsAtNextCapture() const;
|
||||
|
||||
private:
|
||||
void Reset();
|
||||
|
||||
Jitter render_jitter_;
|
||||
Jitter capture_jitter_;
|
||||
|
||||
int num_api_calls_in_a_row_ = 0;
|
||||
int frames_since_last_report_ = 0;
|
||||
bool last_call_was_render_ = false;
|
||||
bool proper_call_observed_ = false;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
|
91
VocieProcess/modules/audio_processing/aec3/block.h
Normal file
91
VocieProcess/modules/audio_processing/aec3/block.h
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Contains one or more channels of 4 milliseconds of audio data.
|
||||
// The audio is split in one or more frequency bands, each with a sampling
|
||||
// rate of 16 kHz.
|
||||
class Block {
|
||||
public:
|
||||
Block(int num_bands, int num_channels, float default_value = 0.0f)
|
||||
: num_bands_(num_bands),
|
||||
num_channels_(num_channels),
|
||||
data_(num_bands * num_channels * kBlockSize, default_value) {}
|
||||
|
||||
// Returns the number of bands.
|
||||
int NumBands() const { return num_bands_; }
|
||||
|
||||
// Returns the number of channels.
|
||||
int NumChannels() const { return num_channels_; }
|
||||
|
||||
// Modifies the number of channels and sets all samples to zero.
|
||||
void SetNumChannels(int num_channels) {
|
||||
num_channels_ = num_channels;
|
||||
data_.resize(num_bands_ * num_channels_ * kBlockSize);
|
||||
std::fill(data_.begin(), data_.end(), 0.0f);
|
||||
}
|
||||
|
||||
// Iterators for accessing the data.
|
||||
auto begin(int band, int channel) {
|
||||
return data_.begin() + GetIndex(band, channel);
|
||||
}
|
||||
|
||||
auto begin(int band, int channel) const {
|
||||
return data_.begin() + GetIndex(band, channel);
|
||||
}
|
||||
|
||||
auto end(int band, int channel) { return begin(band, channel) + kBlockSize; }
|
||||
|
||||
auto end(int band, int channel) const {
|
||||
return begin(band, channel) + kBlockSize;
|
||||
}
|
||||
|
||||
// Access data via ArrayView.
|
||||
rtc::ArrayView<float, kBlockSize> View(int band, int channel) {
|
||||
return rtc::ArrayView<float, kBlockSize>(&data_[GetIndex(band, channel)],
|
||||
kBlockSize);
|
||||
}
|
||||
|
||||
rtc::ArrayView<const float, kBlockSize> View(int band, int channel) const {
|
||||
return rtc::ArrayView<const float, kBlockSize>(
|
||||
&data_[GetIndex(band, channel)], kBlockSize);
|
||||
}
|
||||
|
||||
// Lets two Blocks swap audio data.
|
||||
void Swap(Block& b) {
|
||||
std::swap(num_bands_, b.num_bands_);
|
||||
std::swap(num_channels_, b.num_channels_);
|
||||
data_.swap(b.data_);
|
||||
}
|
||||
|
||||
private:
|
||||
// Returns the index of the first sample of the requested |band| and
|
||||
// |channel|.
|
||||
int GetIndex(int band, int channel) const {
|
||||
return (band * num_channels_ + channel) * kBlockSize;
|
||||
}
|
||||
|
||||
int num_bands_;
|
||||
int num_channels_;
|
||||
std::vector<float> data_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_
|
23
VocieProcess/modules/audio_processing/aec3/block_buffer.cc
Normal file
23
VocieProcess/modules/audio_processing/aec3/block_buffer.cc
Normal file
@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/block_buffer.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
BlockBuffer::BlockBuffer(size_t size, size_t num_bands, size_t num_channels)
|
||||
: size(static_cast<int>(size)),
|
||||
buffer(size, Block(num_bands, num_channels)) {}
|
||||
|
||||
BlockBuffer::~BlockBuffer() = default;
|
||||
|
||||
} // namespace webrtc
|
60
VocieProcess/modules/audio_processing/aec3/block_buffer.h
Normal file
60
VocieProcess/modules/audio_processing/aec3/block_buffer.h
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Struct for bundling a circular buffer of two dimensional vector objects
|
||||
// together with the read and write indices.
|
||||
struct BlockBuffer {
|
||||
BlockBuffer(size_t size, size_t num_bands, size_t num_channels);
|
||||
~BlockBuffer();
|
||||
|
||||
int IncIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index < size - 1 ? index + 1 : 0;
|
||||
}
|
||||
|
||||
int DecIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index > 0 ? index - 1 : size - 1;
|
||||
}
|
||||
|
||||
int OffsetIndex(int index, int offset) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
RTC_DCHECK_GE(size, offset);
|
||||
return (size + index + offset) % size;
|
||||
}
|
||||
|
||||
void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
|
||||
void IncWriteIndex() { write = IncIndex(write); }
|
||||
void DecWriteIndex() { write = DecIndex(write); }
|
||||
void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
|
||||
void IncReadIndex() { read = IncIndex(read); }
|
||||
void DecReadIndex() { read = DecIndex(read); }
|
||||
|
||||
const int size;
|
||||
std::vector<Block> buffer;
|
||||
int write = 0;
|
||||
int read = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
|
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/block_delay_buffer.h"
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
BlockDelayBuffer::BlockDelayBuffer(size_t num_channels,
|
||||
size_t num_bands,
|
||||
size_t frame_length,
|
||||
size_t delay_samples)
|
||||
: frame_length_(frame_length),
|
||||
delay_(delay_samples),
|
||||
buf_(num_channels,
|
||||
std::vector<std::vector<float>>(num_bands,
|
||||
std::vector<float>(delay_, 0.f))) {}
|
||||
|
||||
BlockDelayBuffer::~BlockDelayBuffer() = default;
|
||||
|
||||
void BlockDelayBuffer::DelaySignal(AudioBuffer* frame) {
|
||||
RTC_DCHECK_EQ(buf_.size(), frame->num_channels());
|
||||
if (delay_ == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t num_bands = buf_[0].size();
|
||||
const size_t num_channels = buf_.size();
|
||||
|
||||
const size_t i_start = last_insert_;
|
||||
size_t i = 0;
|
||||
for (size_t ch = 0; ch < num_channels; ++ch) {
|
||||
RTC_DCHECK_EQ(buf_[ch].size(), frame->num_bands());
|
||||
RTC_DCHECK_EQ(buf_[ch].size(), num_bands);
|
||||
rtc::ArrayView<float* const> frame_ch(frame->split_bands(ch), num_bands);
|
||||
const size_t delay = delay_;
|
||||
|
||||
for (size_t band = 0; band < num_bands; ++band) {
|
||||
RTC_DCHECK_EQ(delay_, buf_[ch][band].size());
|
||||
i = i_start;
|
||||
|
||||
// Offloading these pointers and class variables to local variables allows
|
||||
// the compiler to optimize the below loop when compiling with
|
||||
// '-fno-strict-aliasing'.
|
||||
float* buf_ch_band = buf_[ch][band].data();
|
||||
float* frame_ch_band = frame_ch[band];
|
||||
|
||||
for (size_t k = 0, frame_length = frame_length_; k < frame_length; ++k) {
|
||||
const float tmp = buf_ch_band[i];
|
||||
buf_ch_band[i] = frame_ch_band[k];
|
||||
frame_ch_band[k] = tmp;
|
||||
|
||||
i = i < delay - 1 ? i + 1 : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_insert_ = i;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for applying a fixed delay to the samples in a signal partitioned using
|
||||
// the audiobuffer band-splitting scheme.
|
||||
class BlockDelayBuffer {
|
||||
public:
|
||||
BlockDelayBuffer(size_t num_channels,
|
||||
size_t num_bands,
|
||||
size_t frame_length,
|
||||
size_t delay_samples);
|
||||
~BlockDelayBuffer();
|
||||
|
||||
// Delays the samples by the specified delay.
|
||||
void DelaySignal(AudioBuffer* frame);
|
||||
|
||||
private:
|
||||
const size_t frame_length_;
|
||||
const size_t delay_;
|
||||
std::vector<std::vector<std::vector<float>>> buf_;
|
||||
size_t last_insert_ = 0;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
|
83
VocieProcess/modules/audio_processing/aec3/block_framer.cc
Normal file
83
VocieProcess/modules/audio_processing/aec3/block_framer.cc
Normal file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/block_framer.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
BlockFramer::BlockFramer(size_t num_bands, size_t num_channels)
|
||||
: num_bands_(num_bands),
|
||||
num_channels_(num_channels),
|
||||
buffer_(num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_channels,
|
||||
std::vector<float>(kBlockSize, 0.f))) {
|
||||
RTC_DCHECK_LT(0, num_bands);
|
||||
RTC_DCHECK_LT(0, num_channels);
|
||||
}
|
||||
|
||||
BlockFramer::~BlockFramer() = default;
|
||||
|
||||
// All the constants are chosen so that the buffer is either empty or has enough
|
||||
// samples for InsertBlockAndExtractSubFrame to produce a frame. In order to
|
||||
// achieve this, the InsertBlockAndExtractSubFrame and InsertBlock methods need
|
||||
// to be called in the correct order.
|
||||
void BlockFramer::InsertBlock(const Block& block) {
|
||||
RTC_DCHECK_EQ(num_bands_, block.NumBands());
|
||||
RTC_DCHECK_EQ(num_channels_, block.NumChannels());
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
RTC_DCHECK_EQ(0, buffer_[band][channel].size());
|
||||
|
||||
buffer_[band][channel].insert(buffer_[band][channel].begin(),
|
||||
block.begin(band, channel),
|
||||
block.end(band, channel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BlockFramer::InsertBlockAndExtractSubFrame(
|
||||
const Block& block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame) {
|
||||
RTC_DCHECK(sub_frame);
|
||||
RTC_DCHECK_EQ(num_bands_, block.NumBands());
|
||||
RTC_DCHECK_EQ(num_channels_, block.NumChannels());
|
||||
RTC_DCHECK_EQ(num_bands_, sub_frame->size());
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
RTC_DCHECK_EQ(num_channels_, (*sub_frame)[0].size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
RTC_DCHECK_LE(kSubFrameLength,
|
||||
buffer_[band][channel].size() + kBlockSize);
|
||||
RTC_DCHECK_GE(kBlockSize, buffer_[band][channel].size());
|
||||
RTC_DCHECK_EQ(kSubFrameLength, (*sub_frame)[band][channel].size());
|
||||
|
||||
const int samples_to_frame =
|
||||
kSubFrameLength - buffer_[band][channel].size();
|
||||
std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(),
|
||||
(*sub_frame)[band][channel].begin());
|
||||
std::copy(
|
||||
block.begin(band, channel),
|
||||
block.begin(band, channel) + samples_to_frame,
|
||||
(*sub_frame)[band][channel].begin() + buffer_[band][channel].size());
|
||||
buffer_[band][channel].clear();
|
||||
buffer_[band][channel].insert(
|
||||
buffer_[band][channel].begin(),
|
||||
block.begin(band, channel) + samples_to_frame,
|
||||
block.end(band, channel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
49
VocieProcess/modules/audio_processing/aec3/block_framer.h
Normal file
49
VocieProcess/modules/audio_processing/aec3/block_framer.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for producing frames consisting of 2 subframes of 80 samples each
|
||||
// from 64 sample blocks. The class is designed to work together with the
|
||||
// FrameBlocker class which performs the reverse conversion. Used together with
|
||||
// that, this class produces output frames are the same rate as frames are
|
||||
// received by the FrameBlocker class. Note that the internal buffers will
|
||||
// overrun if any other rate of packets insertion is used.
|
||||
class BlockFramer {
|
||||
public:
|
||||
BlockFramer(size_t num_bands, size_t num_channels);
|
||||
~BlockFramer();
|
||||
BlockFramer(const BlockFramer&) = delete;
|
||||
BlockFramer& operator=(const BlockFramer&) = delete;
|
||||
|
||||
// Adds a 64 sample block into the data that will form the next output frame.
|
||||
void InsertBlock(const Block& block);
|
||||
// Adds a 64 sample block and extracts an 80 sample subframe.
|
||||
void InsertBlockAndExtractSubFrame(
|
||||
const Block& block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame);
|
||||
|
||||
private:
|
||||
const size_t num_bands_;
|
||||
const size_t num_channels_;
|
||||
std::vector<std::vector<std::vector<float>>> buffer_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
|
290
VocieProcess/modules/audio_processing/aec3/block_processor.cc
Normal file
290
VocieProcess/modules/audio_processing/aec3/block_processor.cc
Normal file
@ -0,0 +1,290 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/block_processor.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/block_processor_metrics.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/echo_remover.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_controller.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
enum class BlockProcessorApiCall { kCapture, kRender };
|
||||
|
||||
class BlockProcessorImpl final : public BlockProcessor {
|
||||
public:
|
||||
BlockProcessorImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer,
|
||||
std::unique_ptr<RenderDelayController> delay_controller,
|
||||
std::unique_ptr<EchoRemover> echo_remover);
|
||||
|
||||
BlockProcessorImpl() = delete;
|
||||
|
||||
~BlockProcessorImpl() override;
|
||||
|
||||
void ProcessCapture(bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
Block* linear_output,
|
||||
Block* capture_block) override;
|
||||
|
||||
void BufferRender(const Block& block) override;
|
||||
|
||||
void UpdateEchoLeakageStatus(bool leakage_detected) override;
|
||||
|
||||
void GetMetrics(EchoControl::Metrics* metrics) const override;
|
||||
|
||||
void SetAudioBufferDelay(int delay_ms) override;
|
||||
void SetCaptureOutputUsage(bool capture_output_used) override;
|
||||
|
||||
private:
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const EchoCanceller3Config config_;
|
||||
bool capture_properly_started_ = false;
|
||||
bool render_properly_started_ = false;
|
||||
const size_t sample_rate_hz_;
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer_;
|
||||
std::unique_ptr<RenderDelayController> delay_controller_;
|
||||
std::unique_ptr<EchoRemover> echo_remover_;
|
||||
BlockProcessorMetrics metrics_;
|
||||
RenderDelayBuffer::BufferingEvent render_event_;
|
||||
size_t capture_call_counter_ = 0;
|
||||
absl::optional<DelayEstimate> estimated_delay_;
|
||||
};
|
||||
|
||||
std::atomic<int> BlockProcessorImpl::instance_count_(0);
|
||||
|
||||
BlockProcessorImpl::BlockProcessorImpl(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer,
|
||||
std::unique_ptr<RenderDelayController> delay_controller,
|
||||
std::unique_ptr<EchoRemover> echo_remover)
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
config_(config),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
render_buffer_(std::move(render_buffer)),
|
||||
delay_controller_(std::move(delay_controller)),
|
||||
echo_remover_(std::move(echo_remover)),
|
||||
render_event_(RenderDelayBuffer::BufferingEvent::kNone) {
|
||||
RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
|
||||
}
|
||||
|
||||
BlockProcessorImpl::~BlockProcessorImpl() = default;
|
||||
|
||||
void BlockProcessorImpl::ProcessCapture(bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
Block* linear_output,
|
||||
Block* capture_block) {
|
||||
RTC_DCHECK(capture_block);
|
||||
RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), capture_block->NumBands());
|
||||
|
||||
capture_call_counter_++;
|
||||
|
||||
data_dumper_->DumpRaw("aec3_processblock_call_order",
|
||||
static_cast<int>(BlockProcessorApiCall::kCapture));
|
||||
data_dumper_->DumpWav("aec3_processblock_capture_input",
|
||||
capture_block->View(/*band=*/0, /*channel=*/0), 16000,
|
||||
1);
|
||||
|
||||
if (render_properly_started_) {
|
||||
if (!capture_properly_started_) {
|
||||
capture_properly_started_ = true;
|
||||
render_buffer_->Reset();
|
||||
if (delay_controller_)
|
||||
delay_controller_->Reset(true);
|
||||
}
|
||||
} else {
|
||||
// If no render data has yet arrived, do not process the capture signal.
|
||||
render_buffer_->HandleSkippedCaptureProcessing();
|
||||
return;
|
||||
}
|
||||
|
||||
EchoPathVariability echo_path_variability(
|
||||
echo_path_gain_change, EchoPathVariability::DelayAdjustment::kNone,
|
||||
false);
|
||||
|
||||
if (render_event_ == RenderDelayBuffer::BufferingEvent::kRenderOverrun &&
|
||||
render_properly_started_) {
|
||||
echo_path_variability.delay_change =
|
||||
EchoPathVariability::DelayAdjustment::kBufferFlush;
|
||||
if (delay_controller_)
|
||||
delay_controller_->Reset(true);
|
||||
RTC_LOG(LS_WARNING) << "Reset due to render buffer overrun at block "
|
||||
<< capture_call_counter_;
|
||||
}
|
||||
render_event_ = RenderDelayBuffer::BufferingEvent::kNone;
|
||||
|
||||
// Update the render buffers with any newly arrived render blocks and prepare
|
||||
// the render buffers for reading the render data corresponding to the current
|
||||
// capture block.
|
||||
RenderDelayBuffer::BufferingEvent buffer_event =
|
||||
render_buffer_->PrepareCaptureProcessing();
|
||||
// Reset the delay controller at render buffer underrun.
|
||||
if (buffer_event == RenderDelayBuffer::BufferingEvent::kRenderUnderrun) {
|
||||
if (delay_controller_)
|
||||
delay_controller_->Reset(false);
|
||||
}
|
||||
|
||||
data_dumper_->DumpWav("aec3_processblock_capture_input2",
|
||||
capture_block->View(/*band=*/0, /*channel=*/0), 16000,
|
||||
1);
|
||||
|
||||
bool has_delay_estimator = !config_.delay.use_external_delay_estimator;
|
||||
if (has_delay_estimator) {
|
||||
RTC_DCHECK(delay_controller_);
|
||||
// Compute and apply the render delay required to achieve proper signal
|
||||
// alignment.
|
||||
estimated_delay_ = delay_controller_->GetDelay(
|
||||
render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(),
|
||||
*capture_block);
|
||||
|
||||
if (estimated_delay_) {
|
||||
bool delay_change =
|
||||
render_buffer_->AlignFromDelay(estimated_delay_->delay);
|
||||
if (delay_change) {
|
||||
rtc::LoggingSeverity log_level =
|
||||
config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING
|
||||
: rtc::LS_INFO;
|
||||
RTC_LOG_V(log_level) << "Delay changed to " << estimated_delay_->delay
|
||||
<< " at block " << capture_call_counter_;
|
||||
echo_path_variability.delay_change =
|
||||
EchoPathVariability::DelayAdjustment::kNewDetectedDelay;
|
||||
}
|
||||
}
|
||||
|
||||
echo_path_variability.clock_drift = delay_controller_->HasClockdrift();
|
||||
|
||||
} else {
|
||||
render_buffer_->AlignFromExternalDelay();
|
||||
}
|
||||
|
||||
// Remove the echo from the capture signal.
|
||||
if (has_delay_estimator || render_buffer_->HasReceivedBufferDelay()) {
|
||||
echo_remover_->ProcessCapture(
|
||||
echo_path_variability, capture_signal_saturation, estimated_delay_,
|
||||
render_buffer_->GetRenderBuffer(), linear_output, capture_block);
|
||||
}
|
||||
|
||||
// Update the metrics.
|
||||
metrics_.UpdateCapture(false);
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::BufferRender(const Block& block) {
|
||||
RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), block.NumBands());
|
||||
data_dumper_->DumpRaw("aec3_processblock_call_order",
|
||||
static_cast<int>(BlockProcessorApiCall::kRender));
|
||||
data_dumper_->DumpWav("aec3_processblock_render_input",
|
||||
block.View(/*band=*/0, /*channel=*/0), 16000, 1);
|
||||
|
||||
render_event_ = render_buffer_->Insert(block);
|
||||
|
||||
metrics_.UpdateRender(render_event_ !=
|
||||
RenderDelayBuffer::BufferingEvent::kNone);
|
||||
|
||||
render_properly_started_ = true;
|
||||
if (delay_controller_)
|
||||
delay_controller_->LogRenderCall();
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::UpdateEchoLeakageStatus(bool leakage_detected) {
|
||||
echo_remover_->UpdateEchoLeakageStatus(leakage_detected);
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::GetMetrics(EchoControl::Metrics* metrics) const {
|
||||
echo_remover_->GetMetrics(metrics);
|
||||
constexpr int block_size_ms = 4;
|
||||
absl::optional<size_t> delay = render_buffer_->Delay();
|
||||
metrics->delay_ms = delay ? static_cast<int>(*delay) * block_size_ms : 0;
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::SetAudioBufferDelay(int delay_ms) {
|
||||
render_buffer_->SetAudioBufferDelay(delay_ms);
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::SetCaptureOutputUsage(bool capture_output_used) {
|
||||
echo_remover_->SetCaptureOutputUsage(capture_output_used);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
BlockProcessor* BlockProcessor::Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels) {
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer(
|
||||
RenderDelayBuffer::Create(config, sample_rate_hz, num_render_channels));
|
||||
std::unique_ptr<RenderDelayController> delay_controller;
|
||||
if (!config.delay.use_external_delay_estimator) {
|
||||
delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz,
|
||||
num_capture_channels));
|
||||
}
|
||||
std::unique_ptr<EchoRemover> echo_remover(EchoRemover::Create(
|
||||
config, sample_rate_hz, num_render_channels, num_capture_channels));
|
||||
return Create(config, sample_rate_hz, num_render_channels,
|
||||
num_capture_channels, std::move(render_buffer),
|
||||
std::move(delay_controller), std::move(echo_remover));
|
||||
}
|
||||
|
||||
BlockProcessor* BlockProcessor::Create(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer) {
|
||||
std::unique_ptr<RenderDelayController> delay_controller;
|
||||
if (!config.delay.use_external_delay_estimator) {
|
||||
delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz,
|
||||
num_capture_channels));
|
||||
}
|
||||
std::unique_ptr<EchoRemover> echo_remover(EchoRemover::Create(
|
||||
config, sample_rate_hz, num_render_channels, num_capture_channels));
|
||||
return Create(config, sample_rate_hz, num_render_channels,
|
||||
num_capture_channels, std::move(render_buffer),
|
||||
std::move(delay_controller), std::move(echo_remover));
|
||||
}
|
||||
|
||||
BlockProcessor* BlockProcessor::Create(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer,
|
||||
std::unique_ptr<RenderDelayController> delay_controller,
|
||||
std::unique_ptr<EchoRemover> echo_remover) {
|
||||
return new BlockProcessorImpl(config, sample_rate_hz, num_render_channels,
|
||||
num_capture_channels, std::move(render_buffer),
|
||||
std::move(delay_controller),
|
||||
std::move(echo_remover));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
81
VocieProcess/modules/audio_processing/aec3/block_processor.h
Normal file
81
VocieProcess/modules/audio_processing/aec3/block_processor.h
Normal file
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "modules/audio_processing/aec3/echo_remover.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_controller.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for performing echo cancellation on 64 sample blocks of audio data.
|
||||
class BlockProcessor {
|
||||
public:
|
||||
static BlockProcessor* Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
// Only used for testing purposes.
|
||||
static BlockProcessor* Create(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer);
|
||||
static BlockProcessor* Create(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer,
|
||||
std::unique_ptr<RenderDelayController> delay_controller,
|
||||
std::unique_ptr<EchoRemover> echo_remover);
|
||||
|
||||
virtual ~BlockProcessor() = default;
|
||||
|
||||
// Get current metrics.
|
||||
virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0;
|
||||
|
||||
// Provides an optional external estimate of the audio buffer delay.
|
||||
virtual void SetAudioBufferDelay(int delay_ms) = 0;
|
||||
|
||||
// Processes a block of capture data.
|
||||
virtual void ProcessCapture(bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
Block* linear_output,
|
||||
Block* capture_block) = 0;
|
||||
|
||||
// Buffers a block of render data supplied by a FrameBlocker object.
|
||||
virtual void BufferRender(const Block& render_block) = 0;
|
||||
|
||||
// Reports whether echo leakage has been detected in the echo canceller
|
||||
// output.
|
||||
virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0;
|
||||
|
||||
// Specifies whether the capture output will be used. The purpose of this is
|
||||
// to allow the block processor to deactivate some of the processing when the
|
||||
// resulting output is anyway not used, for instance when the endpoint is
|
||||
// muted.
|
||||
virtual void SetCaptureOutputUsage(bool capture_output_used) = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
|
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/block_processor_metrics.h"
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/metrics.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
enum class RenderUnderrunCategory {
|
||||
kNone,
|
||||
kFew,
|
||||
kSeveral,
|
||||
kMany,
|
||||
kConstant,
|
||||
kNumCategories
|
||||
};
|
||||
|
||||
enum class RenderOverrunCategory {
|
||||
kNone,
|
||||
kFew,
|
||||
kSeveral,
|
||||
kMany,
|
||||
kConstant,
|
||||
kNumCategories
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void BlockProcessorMetrics::UpdateCapture(bool underrun) {
|
||||
++capture_block_counter_;
|
||||
if (underrun) {
|
||||
++render_buffer_underruns_;
|
||||
}
|
||||
|
||||
if (capture_block_counter_ == kMetricsReportingIntervalBlocks) {
|
||||
metrics_reported_ = true;
|
||||
|
||||
RenderUnderrunCategory underrun_category;
|
||||
if (render_buffer_underruns_ == 0) {
|
||||
underrun_category = RenderUnderrunCategory::kNone;
|
||||
} else if (render_buffer_underruns_ > (capture_block_counter_ >> 1)) {
|
||||
underrun_category = RenderUnderrunCategory::kConstant;
|
||||
} else if (render_buffer_underruns_ > 100) {
|
||||
underrun_category = RenderUnderrunCategory::kMany;
|
||||
} else if (render_buffer_underruns_ > 10) {
|
||||
underrun_category = RenderUnderrunCategory::kSeveral;
|
||||
} else {
|
||||
underrun_category = RenderUnderrunCategory::kFew;
|
||||
}
|
||||
RTC_HISTOGRAM_ENUMERATION(
|
||||
"WebRTC.Audio.EchoCanceller.RenderUnderruns",
|
||||
static_cast<int>(underrun_category),
|
||||
static_cast<int>(RenderUnderrunCategory::kNumCategories));
|
||||
|
||||
RenderOverrunCategory overrun_category;
|
||||
if (render_buffer_overruns_ == 0) {
|
||||
overrun_category = RenderOverrunCategory::kNone;
|
||||
} else if (render_buffer_overruns_ > (buffer_render_calls_ >> 1)) {
|
||||
overrun_category = RenderOverrunCategory::kConstant;
|
||||
} else if (render_buffer_overruns_ > 100) {
|
||||
overrun_category = RenderOverrunCategory::kMany;
|
||||
} else if (render_buffer_overruns_ > 10) {
|
||||
overrun_category = RenderOverrunCategory::kSeveral;
|
||||
} else {
|
||||
overrun_category = RenderOverrunCategory::kFew;
|
||||
}
|
||||
RTC_HISTOGRAM_ENUMERATION(
|
||||
"WebRTC.Audio.EchoCanceller.RenderOverruns",
|
||||
static_cast<int>(overrun_category),
|
||||
static_cast<int>(RenderOverrunCategory::kNumCategories));
|
||||
|
||||
ResetMetrics();
|
||||
capture_block_counter_ = 0;
|
||||
} else {
|
||||
metrics_reported_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void BlockProcessorMetrics::UpdateRender(bool overrun) {
|
||||
++buffer_render_calls_;
|
||||
if (overrun) {
|
||||
++render_buffer_overruns_;
|
||||
}
|
||||
}
|
||||
|
||||
void BlockProcessorMetrics::ResetMetrics() {
|
||||
render_buffer_underruns_ = 0;
|
||||
render_buffer_overruns_ = 0;
|
||||
buffer_render_calls_ = 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Handles the reporting of metrics for the block_processor.
|
||||
class BlockProcessorMetrics {
|
||||
public:
|
||||
BlockProcessorMetrics() = default;
|
||||
|
||||
BlockProcessorMetrics(const BlockProcessorMetrics&) = delete;
|
||||
BlockProcessorMetrics& operator=(const BlockProcessorMetrics&) = delete;
|
||||
|
||||
// Updates the metric with new capture data.
|
||||
void UpdateCapture(bool underrun);
|
||||
|
||||
// Updates the metric with new render data.
|
||||
void UpdateRender(bool overrun);
|
||||
|
||||
// Returns true if the metrics have just been reported, otherwise false.
|
||||
bool MetricsReported() { return metrics_reported_; }
|
||||
|
||||
private:
|
||||
// Resets the metrics.
|
||||
void ResetMetrics();
|
||||
|
||||
int capture_block_counter_ = 0;
|
||||
bool metrics_reported_ = false;
|
||||
int render_buffer_underruns_ = 0;
|
||||
int render_buffer_overruns_ = 0;
|
||||
int buffer_render_calls_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
|
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/clockdrift_detector.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
ClockdriftDetector::ClockdriftDetector()
|
||||
: level_(Level::kNone), stability_counter_(0) {
|
||||
delay_history_.fill(0);
|
||||
}
|
||||
|
||||
ClockdriftDetector::~ClockdriftDetector() = default;
|
||||
|
||||
void ClockdriftDetector::Update(int delay_estimate) {
|
||||
if (delay_estimate == delay_history_[0]) {
|
||||
// Reset clockdrift level if delay estimate is stable for 7500 blocks (30
|
||||
// seconds).
|
||||
if (++stability_counter_ > 7500)
|
||||
level_ = Level::kNone;
|
||||
return;
|
||||
}
|
||||
|
||||
stability_counter_ = 0;
|
||||
const int d1 = delay_history_[0] - delay_estimate;
|
||||
const int d2 = delay_history_[1] - delay_estimate;
|
||||
const int d3 = delay_history_[2] - delay_estimate;
|
||||
|
||||
// Patterns recognized as positive clockdrift:
|
||||
// [x-3], x-2, x-1, x.
|
||||
// [x-3], x-1, x-2, x.
|
||||
const bool probable_drift_up =
|
||||
(d1 == -1 && d2 == -2) || (d1 == -2 && d2 == -1);
|
||||
const bool drift_up = probable_drift_up && d3 == -3;
|
||||
|
||||
// Patterns recognized as negative clockdrift:
|
||||
// [x+3], x+2, x+1, x.
|
||||
// [x+3], x+1, x+2, x.
|
||||
const bool probable_drift_down = (d1 == 1 && d2 == 2) || (d1 == 2 && d2 == 1);
|
||||
const bool drift_down = probable_drift_down && d3 == 3;
|
||||
|
||||
// Set clockdrift level.
|
||||
if (drift_up || drift_down) {
|
||||
level_ = Level::kVerified;
|
||||
} else if ((probable_drift_up || probable_drift_down) &&
|
||||
level_ == Level::kNone) {
|
||||
level_ = Level::kProbable;
|
||||
}
|
||||
|
||||
// Shift delay history one step.
|
||||
delay_history_[2] = delay_history_[1];
|
||||
delay_history_[1] = delay_history_[0];
|
||||
delay_history_[0] = delay_estimate;
|
||||
}
|
||||
} // namespace webrtc
|
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
struct DownsampledRenderBuffer;
|
||||
struct EchoCanceller3Config;
|
||||
|
||||
// Detects clockdrift by analyzing the estimated delay.
|
||||
class ClockdriftDetector {
|
||||
public:
|
||||
enum class Level { kNone, kProbable, kVerified, kNumCategories };
|
||||
ClockdriftDetector();
|
||||
~ClockdriftDetector();
|
||||
void Update(int delay_estimate);
|
||||
Level ClockdriftLevel() const { return level_; }
|
||||
|
||||
private:
|
||||
std::array<int, 3> delay_history_;
|
||||
Level level_;
|
||||
size_t stability_counter_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
|
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/coarse_filter_update_gain.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
CoarseFilterUpdateGain::CoarseFilterUpdateGain(
|
||||
const EchoCanceller3Config::Filter::CoarseConfiguration& config,
|
||||
size_t config_change_duration_blocks)
|
||||
: config_change_duration_blocks_(
|
||||
static_cast<int>(config_change_duration_blocks)) {
|
||||
SetConfig(config, true);
|
||||
RTC_DCHECK_LT(0, config_change_duration_blocks_);
|
||||
one_by_config_change_duration_blocks_ = 1.f / config_change_duration_blocks_;
|
||||
}
|
||||
|
||||
void CoarseFilterUpdateGain::HandleEchoPathChange() {
|
||||
poor_signal_excitation_counter_ = 0;
|
||||
call_counter_ = 0;
|
||||
}
|
||||
|
||||
void CoarseFilterUpdateGain::Compute(
|
||||
const std::array<float, kFftLengthBy2Plus1>& render_power,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
const FftData& E_coarse,
|
||||
size_t size_partitions,
|
||||
bool saturated_capture_signal,
|
||||
FftData* G) {
|
||||
RTC_DCHECK(G);
|
||||
++call_counter_;
|
||||
|
||||
UpdateCurrentConfig();
|
||||
|
||||
if (render_signal_analyzer.PoorSignalExcitation()) {
|
||||
poor_signal_excitation_counter_ = 0;
|
||||
}
|
||||
|
||||
// Do not update the filter if the render is not sufficiently excited.
|
||||
if (++poor_signal_excitation_counter_ < size_partitions ||
|
||||
saturated_capture_signal || call_counter_ <= size_partitions) {
|
||||
G->re.fill(0.f);
|
||||
G->im.fill(0.f);
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute mu.
|
||||
std::array<float, kFftLengthBy2Plus1> mu;
|
||||
const auto& X2 = render_power;
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
if (X2[k] > current_config_.noise_gate) {
|
||||
mu[k] = current_config_.rate / X2[k];
|
||||
} else {
|
||||
mu[k] = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid updating the filter close to narrow bands in the render signals.
|
||||
render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu);
|
||||
|
||||
// G = mu * E * X2.
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
G->re[k] = mu[k] * E_coarse.re[k];
|
||||
G->im[k] = mu[k] * E_coarse.im[k];
|
||||
}
|
||||
}
|
||||
|
||||
void CoarseFilterUpdateGain::UpdateCurrentConfig() {
|
||||
RTC_DCHECK_GE(config_change_duration_blocks_, config_change_counter_);
|
||||
if (config_change_counter_ > 0) {
|
||||
if (--config_change_counter_ > 0) {
|
||||
auto average = [](float from, float to, float from_weight) {
|
||||
return from * from_weight + to * (1.f - from_weight);
|
||||
};
|
||||
|
||||
float change_factor =
|
||||
config_change_counter_ * one_by_config_change_duration_blocks_;
|
||||
|
||||
current_config_.rate =
|
||||
average(old_target_config_.rate, target_config_.rate, change_factor);
|
||||
current_config_.noise_gate =
|
||||
average(old_target_config_.noise_gate, target_config_.noise_gate,
|
||||
change_factor);
|
||||
} else {
|
||||
current_config_ = old_target_config_ = target_config_;
|
||||
}
|
||||
}
|
||||
RTC_DCHECK_LE(0, config_change_counter_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Provides functionality for computing the fixed gain for the coarse filter.
|
||||
class CoarseFilterUpdateGain {
|
||||
public:
|
||||
explicit CoarseFilterUpdateGain(
|
||||
const EchoCanceller3Config::Filter::CoarseConfiguration& config,
|
||||
size_t config_change_duration_blocks);
|
||||
|
||||
// Takes action in the case of a known echo path change.
|
||||
void HandleEchoPathChange();
|
||||
|
||||
// Computes the gain.
|
||||
void Compute(const std::array<float, kFftLengthBy2Plus1>& render_power,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
const FftData& E_coarse,
|
||||
size_t size_partitions,
|
||||
bool saturated_capture_signal,
|
||||
FftData* G);
|
||||
|
||||
// Sets a new config.
|
||||
void SetConfig(
|
||||
const EchoCanceller3Config::Filter::CoarseConfiguration& config,
|
||||
bool immediate_effect) {
|
||||
if (immediate_effect) {
|
||||
old_target_config_ = current_config_ = target_config_ = config;
|
||||
config_change_counter_ = 0;
|
||||
} else {
|
||||
old_target_config_ = current_config_;
|
||||
target_config_ = config;
|
||||
config_change_counter_ = config_change_duration_blocks_;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
EchoCanceller3Config::Filter::CoarseConfiguration current_config_;
|
||||
EchoCanceller3Config::Filter::CoarseConfiguration target_config_;
|
||||
EchoCanceller3Config::Filter::CoarseConfiguration old_target_config_;
|
||||
const int config_change_duration_blocks_;
|
||||
float one_by_config_change_duration_blocks_;
|
||||
// TODO(peah): Check whether this counter should instead be initialized to a
|
||||
// large value.
|
||||
size_t poor_signal_excitation_counter_ = 0;
|
||||
size_t call_counter_ = 0;
|
||||
int config_change_counter_ = 0;
|
||||
|
||||
void UpdateCurrentConfig();
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
|
@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/comfort_noise_generator.h"
|
||||
|
||||
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <numeric>
|
||||
|
||||
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "modules/audio_processing/aec3/vector_math.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Computes the noise floor value that matches a WGN input of noise_floor_dbfs.
|
||||
float GetNoiseFloorFactor(float noise_floor_dbfs) {
|
||||
// kdBfsNormalization = 20.f*log10(32768.f).
|
||||
constexpr float kdBfsNormalization = 90.30899869919436f;
|
||||
return 64.f * powf(10.f, (kdBfsNormalization + noise_floor_dbfs) * 0.1f);
|
||||
}
|
||||
|
||||
// Table of sqrt(2) * sin(2*pi*i/32).
|
||||
constexpr float kSqrt2Sin[32] = {
|
||||
+0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f,
|
||||
+1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f,
|
||||
+1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f,
|
||||
+0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f,
|
||||
-1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f,
|
||||
-1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f,
|
||||
-0.5411961f, -0.2758994f};
|
||||
|
||||
void GenerateComfortNoise(Aec3Optimization optimization,
|
||||
const std::array<float, kFftLengthBy2Plus1>& N2,
|
||||
uint32_t* seed,
|
||||
FftData* lower_band_noise,
|
||||
FftData* upper_band_noise) {
|
||||
FftData* N_low = lower_band_noise;
|
||||
FftData* N_high = upper_band_noise;
|
||||
|
||||
// Compute square root spectrum.
|
||||
std::array<float, kFftLengthBy2Plus1> N;
|
||||
std::copy(N2.begin(), N2.end(), N.begin());
|
||||
aec3::VectorMath(optimization).Sqrt(N);
|
||||
|
||||
// Compute the noise level for the upper bands.
|
||||
constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1);
|
||||
constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2;
|
||||
const float high_band_noise_level =
|
||||
std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) *
|
||||
kOneByNumBands;
|
||||
|
||||
// The analysis and synthesis windowing cause loss of power when
|
||||
// cross-fading the noise where frames are completely uncorrelated
|
||||
// (generated with random phase), hence the factor sqrt(2).
|
||||
// This is not the case for the speech signal where the input is overlapping
|
||||
// (strong correlation).
|
||||
N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] =
|
||||
N_high->re[kFftLengthBy2] = 0.f;
|
||||
for (size_t k = 1; k < kFftLengthBy2; k++) {
|
||||
constexpr int kIndexMask = 32 - 1;
|
||||
// Generate a random 31-bit integer.
|
||||
seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1);
|
||||
// Convert to a 5-bit index.
|
||||
int i = seed[0] >> 26;
|
||||
|
||||
// y = sqrt(2) * sin(a)
|
||||
const float x = kSqrt2Sin[i];
|
||||
// x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2)
|
||||
const float y = kSqrt2Sin[(i + 8) & kIndexMask];
|
||||
|
||||
// Form low-frequency noise via spectral shaping.
|
||||
N_low->re[k] = N[k] * x;
|
||||
N_low->im[k] = N[k] * y;
|
||||
|
||||
// Form the high-frequency noise via simple levelling.
|
||||
N_high->re[k] = high_band_noise_level * x;
|
||||
N_high->im[k] = high_band_noise_level * y;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ComfortNoiseGenerator::ComfortNoiseGenerator(const EchoCanceller3Config& config,
|
||||
Aec3Optimization optimization,
|
||||
size_t num_capture_channels)
|
||||
: optimization_(optimization),
|
||||
seed_(42),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
noise_floor_(GetNoiseFloorFactor(config.comfort_noise.noise_floor_dbfs)),
|
||||
N2_initial_(
|
||||
std::make_unique<std::vector<std::array<float, kFftLengthBy2Plus1>>>(
|
||||
num_capture_channels_)),
|
||||
Y2_smoothed_(num_capture_channels_),
|
||||
N2_(num_capture_channels_) {
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
(*N2_initial_)[ch].fill(0.f);
|
||||
Y2_smoothed_[ch].fill(0.f);
|
||||
N2_[ch].fill(1.0e6f);
|
||||
}
|
||||
}
|
||||
|
||||
ComfortNoiseGenerator::~ComfortNoiseGenerator() = default;
|
||||
|
||||
void ComfortNoiseGenerator::Compute(
|
||||
bool saturated_capture,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
capture_spectrum,
|
||||
rtc::ArrayView<FftData> lower_band_noise,
|
||||
rtc::ArrayView<FftData> upper_band_noise) {
|
||||
const auto& Y2 = capture_spectrum;
|
||||
|
||||
if (!saturated_capture) {
|
||||
// Smooth Y2.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(),
|
||||
Y2[ch].begin(), Y2_smoothed_[ch].begin(),
|
||||
[](float a, float b) { return a + 0.1f * (b - a); });
|
||||
}
|
||||
|
||||
if (N2_counter_ > 50) {
|
||||
// Update N2 from Y2_smoothed.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(),
|
||||
N2_[ch].begin(), [](float a, float b) {
|
||||
return b < a ? (0.9f * b + 0.1f * a) * 1.0002f
|
||||
: a * 1.0002f;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (N2_initial_) {
|
||||
if (++N2_counter_ == 1000) {
|
||||
N2_initial_.reset();
|
||||
} else {
|
||||
// Compute the N2_initial from N2.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::transform(N2_[ch].begin(), N2_[ch].end(),
|
||||
(*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(),
|
||||
[](float a, float b) {
|
||||
return a > b ? b + 0.001f * (a - b) : a;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
for (auto& n : N2_[ch]) {
|
||||
n = std::max(n, noise_floor_);
|
||||
}
|
||||
if (N2_initial_) {
|
||||
for (auto& n : (*N2_initial_)[ch]) {
|
||||
n = std::max(n, noise_floor_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Choose N2 estimate to use.
|
||||
const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_;
|
||||
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch],
|
||||
&upper_band_noise[ch]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
|
||||
void EstimateComfortNoise_SSE2(const std::array<float, kFftLengthBy2Plus1>& N2,
|
||||
uint32_t* seed,
|
||||
FftData* lower_band_noise,
|
||||
FftData* upper_band_noise);
|
||||
#endif
|
||||
void EstimateComfortNoise(const std::array<float, kFftLengthBy2Plus1>& N2,
|
||||
uint32_t* seed,
|
||||
FftData* lower_band_noise,
|
||||
FftData* upper_band_noise);
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
// Generates the comfort noise.
|
||||
class ComfortNoiseGenerator {
|
||||
public:
|
||||
ComfortNoiseGenerator(const EchoCanceller3Config& config,
|
||||
Aec3Optimization optimization,
|
||||
size_t num_capture_channels);
|
||||
ComfortNoiseGenerator() = delete;
|
||||
~ComfortNoiseGenerator();
|
||||
ComfortNoiseGenerator(const ComfortNoiseGenerator&) = delete;
|
||||
|
||||
// Computes the comfort noise.
|
||||
void Compute(bool saturated_capture,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
capture_spectrum,
|
||||
rtc::ArrayView<FftData> lower_band_noise,
|
||||
rtc::ArrayView<FftData> upper_band_noise);
|
||||
|
||||
// Returns the estimate of the background noise spectrum.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> NoiseSpectrum()
|
||||
const {
|
||||
return N2_;
|
||||
}
|
||||
|
||||
private:
|
||||
const Aec3Optimization optimization_;
|
||||
uint32_t seed_;
|
||||
const size_t num_capture_channels_;
|
||||
const float noise_floor_;
|
||||
std::unique_ptr<std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
N2_initial_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_smoothed_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> N2_;
|
||||
int N2_counter_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
|
@ -0,0 +1,71 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/config_selector.h"
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// Validates that the mono and the multichannel configs have compatible fields.
|
||||
bool CompatibleConfigs(const EchoCanceller3Config& mono_config,
|
||||
const EchoCanceller3Config& multichannel_config) {
|
||||
if (mono_config.delay.fixed_capture_delay_samples !=
|
||||
multichannel_config.delay.fixed_capture_delay_samples) {
|
||||
return false;
|
||||
}
|
||||
if (mono_config.filter.export_linear_aec_output !=
|
||||
multichannel_config.filter.export_linear_aec_output) {
|
||||
return false;
|
||||
}
|
||||
if (mono_config.filter.high_pass_filter_echo_reference !=
|
||||
multichannel_config.filter.high_pass_filter_echo_reference) {
|
||||
return false;
|
||||
}
|
||||
if (mono_config.multi_channel.detect_stereo_content !=
|
||||
multichannel_config.multi_channel.detect_stereo_content) {
|
||||
return false;
|
||||
}
|
||||
if (mono_config.multi_channel.stereo_detection_timeout_threshold_seconds !=
|
||||
multichannel_config.multi_channel
|
||||
.stereo_detection_timeout_threshold_seconds) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ConfigSelector::ConfigSelector(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int num_render_input_channels)
|
||||
: config_(config), multichannel_config_(multichannel_config) {
|
||||
if (multichannel_config_.has_value()) {
|
||||
RTC_DCHECK(CompatibleConfigs(config_, *multichannel_config_));
|
||||
}
|
||||
|
||||
Update(!config_.multi_channel.detect_stereo_content &&
|
||||
num_render_input_channels > 1);
|
||||
|
||||
RTC_DCHECK(active_config_);
|
||||
}
|
||||
|
||||
void ConfigSelector::Update(bool multichannel_content) {
|
||||
if (multichannel_content && multichannel_config_.has_value()) {
|
||||
active_config_ = &(*multichannel_config_);
|
||||
} else {
|
||||
active_config_ = &config_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
41
VocieProcess/modules/audio_processing/aec3/config_selector.h
Normal file
41
VocieProcess/modules/audio_processing/aec3/config_selector.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Selects the config to use.
|
||||
class ConfigSelector {
|
||||
public:
|
||||
ConfigSelector(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int num_render_input_channels);
|
||||
|
||||
// Updates the config selection based on the detection of multichannel
|
||||
// content.
|
||||
void Update(bool multichannel_content);
|
||||
|
||||
const EchoCanceller3Config& active_config() const { return *active_config_; }
|
||||
|
||||
private:
|
||||
const EchoCanceller3Config config_;
|
||||
const absl::optional<EchoCanceller3Config> multichannel_config_;
|
||||
const EchoCanceller3Config* active_config_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
|
91
VocieProcess/modules/audio_processing/aec3/decimator.cc
Normal file
91
VocieProcess/modules/audio_processing/aec3/decimator.cc
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/decimator.h"
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// signal.butter(2, 3400/8000.0, 'lowpass', analog=False)
|
||||
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetLowPassFilterDS2() {
|
||||
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
|
||||
{{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f},
|
||||
{{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f},
|
||||
{{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f}};
|
||||
}
|
||||
|
||||
// signal.ellip(6, 1, 40, 1800/8000, btype='lowpass', analog=False)
|
||||
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetLowPassFilterDS4() {
|
||||
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
|
||||
{{-0.08873842f, 0.99605496f}, {0.75916227f, 0.23841065f}, 0.26250696827f},
|
||||
{{0.62273832f, 0.78243018f}, {0.74892112f, 0.5410152f}, 0.26250696827f},
|
||||
{{0.71107693f, 0.70311421f}, {0.74895534f, 0.63924616f}, 0.26250696827f}};
|
||||
}
|
||||
|
||||
// signal.cheby1(1, 6, [1000/8000, 2000/8000], btype='bandpass', analog=False)
|
||||
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetBandPassFilterDS8() {
|
||||
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
|
||||
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
|
||||
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
|
||||
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
|
||||
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
|
||||
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}};
|
||||
}
|
||||
|
||||
// signal.butter(2, 1000/8000.0, 'highpass', analog=False)
|
||||
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetHighPassFilter() {
|
||||
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
|
||||
{{1.f, 0.f}, {0.72712179f, 0.21296904f}, 0.7570763753338849f}};
|
||||
}
|
||||
|
||||
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetPassThroughFilter() {
|
||||
return std::vector<CascadedBiQuadFilter::BiQuadParam>{};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
Decimator::Decimator(size_t down_sampling_factor)
|
||||
: down_sampling_factor_(down_sampling_factor),
|
||||
anti_aliasing_filter_(down_sampling_factor_ == 4
|
||||
? GetLowPassFilterDS4()
|
||||
: (down_sampling_factor_ == 8
|
||||
? GetBandPassFilterDS8()
|
||||
: GetLowPassFilterDS2())),
|
||||
noise_reduction_filter_(down_sampling_factor_ == 8
|
||||
? GetPassThroughFilter()
|
||||
: GetHighPassFilter()) {
|
||||
RTC_DCHECK(down_sampling_factor_ == 2 || down_sampling_factor_ == 4 ||
|
||||
down_sampling_factor_ == 8);
|
||||
}
|
||||
|
||||
void Decimator::Decimate(rtc::ArrayView<const float> in,
|
||||
rtc::ArrayView<float> out) {
|
||||
RTC_DCHECK_EQ(kBlockSize, in.size());
|
||||
RTC_DCHECK_EQ(kBlockSize / down_sampling_factor_, out.size());
|
||||
std::array<float, kBlockSize> x;
|
||||
|
||||
// Limit the frequency content of the signal to avoid aliasing.
|
||||
anti_aliasing_filter_.Process(in, x);
|
||||
|
||||
// Reduce the impact of near-end noise.
|
||||
noise_reduction_filter_.Process(x);
|
||||
|
||||
// Downsample the signal.
|
||||
for (size_t j = 0, k = 0; j < out.size(); ++j, k += down_sampling_factor_) {
|
||||
RTC_DCHECK_GT(kBlockSize, k);
|
||||
out[j] = x[k];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
41
VocieProcess/modules/audio_processing/aec3/decimator.h
Normal file
41
VocieProcess/modules/audio_processing/aec3/decimator.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/utility/cascaded_biquad_filter.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Provides functionality for decimating a signal.
|
||||
class Decimator {
|
||||
public:
|
||||
explicit Decimator(size_t down_sampling_factor);
|
||||
|
||||
Decimator(const Decimator&) = delete;
|
||||
Decimator& operator=(const Decimator&) = delete;
|
||||
|
||||
// Downsamples the signal.
|
||||
void Decimate(rtc::ArrayView<const float> in, rtc::ArrayView<float> out);
|
||||
|
||||
private:
|
||||
const size_t down_sampling_factor_;
|
||||
CascadedBiQuadFilter anti_aliasing_filter_;
|
||||
CascadedBiQuadFilter noise_reduction_filter_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
|
33
VocieProcess/modules/audio_processing/aec3/delay_estimate.h
Normal file
33
VocieProcess/modules/audio_processing/aec3/delay_estimate.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Stores delay_estimates.
|
||||
struct DelayEstimate {
|
||||
enum class Quality { kCoarse, kRefined };
|
||||
|
||||
DelayEstimate(Quality quality, size_t delay)
|
||||
: quality(quality), delay(delay) {}
|
||||
|
||||
Quality quality;
|
||||
size_t delay;
|
||||
size_t blocks_since_last_change = 0;
|
||||
size_t blocks_since_last_update = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
|
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/dominant_nearend_detector.h"
|
||||
|
||||
#include <numeric>
|
||||
|
||||
namespace webrtc {
|
||||
DominantNearendDetector::DominantNearendDetector(
|
||||
const EchoCanceller3Config::Suppressor::DominantNearendDetection& config,
|
||||
size_t num_capture_channels)
|
||||
: enr_threshold_(config.enr_threshold),
|
||||
enr_exit_threshold_(config.enr_exit_threshold),
|
||||
snr_threshold_(config.snr_threshold),
|
||||
hold_duration_(config.hold_duration),
|
||||
trigger_threshold_(config.trigger_threshold),
|
||||
use_during_initial_phase_(config.use_during_initial_phase),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
trigger_counters_(num_capture_channels_),
|
||||
hold_counters_(num_capture_channels_) {}
|
||||
|
||||
void DominantNearendDetector::Update(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
nearend_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
residual_echo_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
comfort_noise_spectrum,
|
||||
bool initial_state) {
|
||||
nearend_state_ = false;
|
||||
|
||||
auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
|
||||
RTC_DCHECK_LE(16, spectrum.size());
|
||||
return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
|
||||
};
|
||||
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
const float ne_sum = low_frequency_energy(nearend_spectrum[ch]);
|
||||
const float echo_sum = low_frequency_energy(residual_echo_spectrum[ch]);
|
||||
const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]);
|
||||
|
||||
// Detect strong active nearend if the nearend is sufficiently stronger than
|
||||
// the echo and the nearend noise.
|
||||
if ((!initial_state || use_during_initial_phase_) &&
|
||||
echo_sum < enr_threshold_ * ne_sum &&
|
||||
ne_sum > snr_threshold_ * noise_sum) {
|
||||
if (++trigger_counters_[ch] >= trigger_threshold_) {
|
||||
// After a period of strong active nearend activity, flag nearend mode.
|
||||
hold_counters_[ch] = hold_duration_;
|
||||
trigger_counters_[ch] = trigger_threshold_;
|
||||
}
|
||||
} else {
|
||||
// Forget previously detected strong active nearend activity.
|
||||
trigger_counters_[ch] = std::max(0, trigger_counters_[ch] - 1);
|
||||
}
|
||||
|
||||
// Exit nearend-state early at strong echo.
|
||||
if (echo_sum > enr_exit_threshold_ * ne_sum &&
|
||||
echo_sum > snr_threshold_ * noise_sum) {
|
||||
hold_counters_[ch] = 0;
|
||||
}
|
||||
|
||||
// Remain in any nearend mode for a certain duration.
|
||||
hold_counters_[ch] = std::max(0, hold_counters_[ch] - 1);
|
||||
nearend_state_ = nearend_state_ || hold_counters_[ch] > 0;
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/nearend_detector.h"
|
||||
|
||||
namespace webrtc {
|
||||
// Class for selecting whether the suppressor is in the nearend or echo state.
|
||||
class DominantNearendDetector : public NearendDetector {
|
||||
public:
|
||||
DominantNearendDetector(
|
||||
const EchoCanceller3Config::Suppressor::DominantNearendDetection& config,
|
||||
size_t num_capture_channels);
|
||||
|
||||
// Returns whether the current state is the nearend state.
|
||||
bool IsNearendState() const override { return nearend_state_; }
|
||||
|
||||
// Updates the state selection based on latest spectral estimates.
|
||||
void Update(rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
nearend_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
residual_echo_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
comfort_noise_spectrum,
|
||||
bool initial_state) override;
|
||||
|
||||
private:
|
||||
const float enr_threshold_;
|
||||
const float enr_exit_threshold_;
|
||||
const float snr_threshold_;
|
||||
const int hold_duration_;
|
||||
const int trigger_threshold_;
|
||||
const bool use_during_initial_phase_;
|
||||
const size_t num_capture_channels_;
|
||||
|
||||
bool nearend_state_ = false;
|
||||
std::vector<int> trigger_counters_;
|
||||
std::vector<int> hold_counters_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
|
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
DownsampledRenderBuffer::DownsampledRenderBuffer(size_t downsampled_buffer_size)
|
||||
: size(static_cast<int>(downsampled_buffer_size)),
|
||||
buffer(downsampled_buffer_size, 0.f) {
|
||||
std::fill(buffer.begin(), buffer.end(), 0.f);
|
||||
}
|
||||
|
||||
DownsampledRenderBuffer::~DownsampledRenderBuffer() = default;
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Holds the circular buffer of the downsampled render data.
|
||||
struct DownsampledRenderBuffer {
|
||||
explicit DownsampledRenderBuffer(size_t downsampled_buffer_size);
|
||||
~DownsampledRenderBuffer();
|
||||
|
||||
int IncIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index < size - 1 ? index + 1 : 0;
|
||||
}
|
||||
|
||||
int DecIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index > 0 ? index - 1 : size - 1;
|
||||
}
|
||||
|
||||
int OffsetIndex(int index, int offset) const {
|
||||
RTC_DCHECK_GE(buffer.size(), offset);
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return (size + index + offset) % size;
|
||||
}
|
||||
|
||||
void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
|
||||
void IncWriteIndex() { write = IncIndex(write); }
|
||||
void DecWriteIndex() { write = DecIndex(write); }
|
||||
void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
|
||||
void IncReadIndex() { read = IncIndex(read); }
|
||||
void DecReadIndex() { read = DecIndex(read); }
|
||||
|
||||
const int size;
|
||||
std::vector<float> buffer;
|
||||
int write = 0;
|
||||
int read = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
|
119
VocieProcess/modules/audio_processing/aec3/echo_audibility.cc
Normal file
119
VocieProcess/modules/audio_processing/aec3/echo_audibility.cc
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/echo_audibility.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/block_buffer.h"
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "modules/audio_processing/aec3/stationarity_estimator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
EchoAudibility::EchoAudibility(bool use_render_stationarity_at_init)
|
||||
: use_render_stationarity_at_init_(use_render_stationarity_at_init) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
EchoAudibility::~EchoAudibility() = default;
|
||||
|
||||
void EchoAudibility::Update(const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> average_reverb,
|
||||
int delay_blocks,
|
||||
bool external_delay_seen) {
|
||||
UpdateRenderNoiseEstimator(render_buffer.GetSpectrumBuffer(),
|
||||
render_buffer.GetBlockBuffer(),
|
||||
external_delay_seen);
|
||||
|
||||
if (external_delay_seen || use_render_stationarity_at_init_) {
|
||||
UpdateRenderStationarityFlags(render_buffer, average_reverb, delay_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
void EchoAudibility::Reset() {
|
||||
render_stationarity_.Reset();
|
||||
non_zero_render_seen_ = false;
|
||||
render_spectrum_write_prev_ = absl::nullopt;
|
||||
}
|
||||
|
||||
void EchoAudibility::UpdateRenderStationarityFlags(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> average_reverb,
|
||||
int min_channel_delay_blocks) {
|
||||
const SpectrumBuffer& spectrum_buffer = render_buffer.GetSpectrumBuffer();
|
||||
int idx_at_delay = spectrum_buffer.OffsetIndex(spectrum_buffer.read,
|
||||
min_channel_delay_blocks);
|
||||
|
||||
int num_lookahead = render_buffer.Headroom() - min_channel_delay_blocks + 1;
|
||||
num_lookahead = std::max(0, num_lookahead);
|
||||
|
||||
render_stationarity_.UpdateStationarityFlags(spectrum_buffer, average_reverb,
|
||||
idx_at_delay, num_lookahead);
|
||||
}
|
||||
|
||||
void EchoAudibility::UpdateRenderNoiseEstimator(
|
||||
const SpectrumBuffer& spectrum_buffer,
|
||||
const BlockBuffer& block_buffer,
|
||||
bool external_delay_seen) {
|
||||
if (!render_spectrum_write_prev_) {
|
||||
render_spectrum_write_prev_ = spectrum_buffer.write;
|
||||
render_block_write_prev_ = block_buffer.write;
|
||||
return;
|
||||
}
|
||||
int render_spectrum_write_current = spectrum_buffer.write;
|
||||
if (!non_zero_render_seen_ && !external_delay_seen) {
|
||||
non_zero_render_seen_ = !IsRenderTooLow(block_buffer);
|
||||
}
|
||||
if (non_zero_render_seen_) {
|
||||
for (int idx = render_spectrum_write_prev_.value();
|
||||
idx != render_spectrum_write_current;
|
||||
idx = spectrum_buffer.DecIndex(idx)) {
|
||||
render_stationarity_.UpdateNoiseEstimator(spectrum_buffer.buffer[idx]);
|
||||
}
|
||||
}
|
||||
render_spectrum_write_prev_ = render_spectrum_write_current;
|
||||
}
|
||||
|
||||
bool EchoAudibility::IsRenderTooLow(const BlockBuffer& block_buffer) {
|
||||
const int num_render_channels =
|
||||
static_cast<int>(block_buffer.buffer[0].NumChannels());
|
||||
bool too_low = false;
|
||||
const int render_block_write_current = block_buffer.write;
|
||||
if (render_block_write_current == render_block_write_prev_) {
|
||||
too_low = true;
|
||||
} else {
|
||||
for (int idx = render_block_write_prev_; idx != render_block_write_current;
|
||||
idx = block_buffer.IncIndex(idx)) {
|
||||
float max_abs_over_channels = 0.f;
|
||||
for (int ch = 0; ch < num_render_channels; ++ch) {
|
||||
rtc::ArrayView<const float, kBlockSize> block =
|
||||
block_buffer.buffer[idx].View(/*band=*/0, /*channel=*/ch);
|
||||
auto r = std::minmax_element(block.cbegin(), block.cend());
|
||||
float max_abs_channel =
|
||||
std::max(std::fabs(*r.first), std::fabs(*r.second));
|
||||
max_abs_over_channels =
|
||||
std::max(max_abs_over_channels, max_abs_channel);
|
||||
}
|
||||
if (max_abs_over_channels < 10.f) {
|
||||
too_low = true; // Discards all blocks if one of them is too low.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
render_block_write_prev_ = render_block_write_current;
|
||||
return too_low;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
85
VocieProcess/modules/audio_processing/aec3/echo_audibility.h
Normal file
85
VocieProcess/modules/audio_processing/aec3/echo_audibility.h
Normal file
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/block_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "modules/audio_processing/aec3/stationarity_estimator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class EchoAudibility {
|
||||
public:
|
||||
explicit EchoAudibility(bool use_render_stationarity_at_init);
|
||||
~EchoAudibility();
|
||||
|
||||
EchoAudibility(const EchoAudibility&) = delete;
|
||||
EchoAudibility& operator=(const EchoAudibility&) = delete;
|
||||
|
||||
// Feed new render data to the echo audibility estimator.
|
||||
void Update(const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> average_reverb,
|
||||
int min_channel_delay_blocks,
|
||||
bool external_delay_seen);
|
||||
// Get the residual echo scaling.
|
||||
void GetResidualEchoScaling(bool filter_has_had_time_to_converge,
|
||||
rtc::ArrayView<float> residual_scaling) const {
|
||||
for (size_t band = 0; band < residual_scaling.size(); ++band) {
|
||||
if (render_stationarity_.IsBandStationary(band) &&
|
||||
(filter_has_had_time_to_converge ||
|
||||
use_render_stationarity_at_init_)) {
|
||||
residual_scaling[band] = 0.f;
|
||||
} else {
|
||||
residual_scaling[band] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the current render block is estimated as stationary.
|
||||
bool IsBlockStationary() const {
|
||||
return render_stationarity_.IsBlockStationary();
|
||||
}
|
||||
|
||||
private:
|
||||
// Reset the EchoAudibility class.
|
||||
void Reset();
|
||||
|
||||
// Updates the render stationarity flags for the current frame.
|
||||
void UpdateRenderStationarityFlags(const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> average_reverb,
|
||||
int delay_blocks);
|
||||
|
||||
// Updates the noise estimator with the new render data since the previous
|
||||
// call to this method.
|
||||
void UpdateRenderNoiseEstimator(const SpectrumBuffer& spectrum_buffer,
|
||||
const BlockBuffer& block_buffer,
|
||||
bool external_delay_seen);
|
||||
|
||||
// Returns a bool being true if the render signal contains just close to zero
|
||||
// values.
|
||||
bool IsRenderTooLow(const BlockBuffer& block_buffer);
|
||||
|
||||
absl::optional<int> render_spectrum_write_prev_;
|
||||
int render_block_write_prev_;
|
||||
bool non_zero_render_seen_;
|
||||
const bool use_render_stationarity_at_init_;
|
||||
StationarityEstimator render_stationarity_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
|
991
VocieProcess/modules/audio_processing/aec3/echo_canceller3.cc
Normal file
991
VocieProcess/modules/audio_processing/aec3/echo_canceller3.cc
Normal file
@ -0,0 +1,991 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/echo_canceller3.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/high_pass_filter.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/experiments/field_trial_parser.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
enum class EchoCanceller3ApiCall { kCapture, kRender };
|
||||
|
||||
bool DetectSaturation(rtc::ArrayView<const float> y) {
|
||||
for (size_t k = 0; k < y.size(); ++k) {
|
||||
if (y[k] >= 32700.0f || y[k] <= -32700.0f) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Retrieves a value from a field trial if it is available. If no value is
|
||||
// present, the default value is returned. If the retrieved value is beyond the
|
||||
// specified limits, the default value is returned instead.
|
||||
void RetrieveFieldTrialValue(absl::string_view trial_name,
|
||||
float min,
|
||||
float max,
|
||||
float* value_to_update) {
|
||||
const std::string field_trial_str = field_trial::FindFullName(trial_name);
|
||||
|
||||
FieldTrialParameter<double> field_trial_param(/*key=*/"", *value_to_update);
|
||||
|
||||
ParseFieldTrial({&field_trial_param}, field_trial_str);
|
||||
float field_trial_value = static_cast<float>(field_trial_param.Get());
|
||||
|
||||
if (field_trial_value >= min && field_trial_value <= max &&
|
||||
field_trial_value != *value_to_update) {
|
||||
RTC_LOG(LS_INFO) << "Key " << trial_name
|
||||
<< " changing AEC3 parameter value from "
|
||||
<< *value_to_update << " to " << field_trial_value;
|
||||
*value_to_update = field_trial_value;
|
||||
}
|
||||
}
|
||||
|
||||
void RetrieveFieldTrialValue(absl::string_view trial_name,
|
||||
int min,
|
||||
int max,
|
||||
int* value_to_update) {
|
||||
const std::string field_trial_str = field_trial::FindFullName(trial_name);
|
||||
|
||||
FieldTrialParameter<int> field_trial_param(/*key=*/"", *value_to_update);
|
||||
|
||||
ParseFieldTrial({&field_trial_param}, field_trial_str);
|
||||
float field_trial_value = field_trial_param.Get();
|
||||
|
||||
if (field_trial_value >= min && field_trial_value <= max &&
|
||||
field_trial_value != *value_to_update) {
|
||||
RTC_LOG(LS_INFO) << "Key " << trial_name
|
||||
<< " changing AEC3 parameter value from "
|
||||
<< *value_to_update << " to " << field_trial_value;
|
||||
*value_to_update = field_trial_value;
|
||||
}
|
||||
}
|
||||
|
||||
void FillSubFrameView(
|
||||
AudioBuffer* frame,
|
||||
size_t sub_frame_index,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
|
||||
RTC_DCHECK_GE(1, sub_frame_index);
|
||||
RTC_DCHECK_LE(0, sub_frame_index);
|
||||
RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size());
|
||||
RTC_DCHECK_EQ(frame->num_channels(), (*sub_frame_view)[0].size());
|
||||
for (size_t band = 0; band < sub_frame_view->size(); ++band) {
|
||||
for (size_t channel = 0; channel < (*sub_frame_view)[0].size(); ++channel) {
|
||||
(*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
|
||||
&frame->split_bands(channel)[band][sub_frame_index * kSubFrameLength],
|
||||
kSubFrameLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FillSubFrameView(
|
||||
bool proper_downmix_needed,
|
||||
std::vector<std::vector<std::vector<float>>>* frame,
|
||||
size_t sub_frame_index,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
|
||||
RTC_DCHECK_GE(1, sub_frame_index);
|
||||
RTC_DCHECK_EQ(frame->size(), sub_frame_view->size());
|
||||
const size_t frame_num_channels = (*frame)[0].size();
|
||||
const size_t sub_frame_num_channels = (*sub_frame_view)[0].size();
|
||||
if (frame_num_channels > sub_frame_num_channels) {
|
||||
RTC_DCHECK_EQ(sub_frame_num_channels, 1u);
|
||||
if (proper_downmix_needed) {
|
||||
// When a proper downmix is needed (which is the case when proper stereo
|
||||
// is present in the echo reference signal but the echo canceller does the
|
||||
// processing in mono) downmix the echo reference by averaging the channel
|
||||
// content (otherwise downmixing is done by selecting channel 0).
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
for (size_t ch = 1; ch < frame_num_channels; ++ch) {
|
||||
for (size_t k = 0; k < kSubFrameLength; ++k) {
|
||||
(*frame)[band][/*channel=*/0]
|
||||
[sub_frame_index * kSubFrameLength + k] +=
|
||||
(*frame)[band][ch][sub_frame_index * kSubFrameLength + k];
|
||||
}
|
||||
}
|
||||
const float one_by_num_channels = 1.0f / frame_num_channels;
|
||||
for (size_t k = 0; k < kSubFrameLength; ++k) {
|
||||
(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength +
|
||||
k] *= one_by_num_channels;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
(*sub_frame_view)[band][/*channel=*/0] = rtc::ArrayView<float>(
|
||||
&(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength],
|
||||
kSubFrameLength);
|
||||
}
|
||||
} else {
|
||||
RTC_DCHECK_EQ(frame_num_channels, sub_frame_num_channels);
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
|
||||
(*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
|
||||
&(*frame)[band][channel][sub_frame_index * kSubFrameLength],
|
||||
kSubFrameLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessCaptureFrameContent(
|
||||
AudioBuffer* linear_output,
|
||||
AudioBuffer* capture,
|
||||
bool level_change,
|
||||
bool aec_reference_is_downmixed_stereo,
|
||||
bool saturated_microphone_signal,
|
||||
size_t sub_frame_index,
|
||||
FrameBlocker* capture_blocker,
|
||||
BlockFramer* linear_output_framer,
|
||||
BlockFramer* output_framer,
|
||||
BlockProcessor* block_processor,
|
||||
Block* linear_output_block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>*
|
||||
linear_output_sub_frame_view,
|
||||
Block* capture_block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* capture_sub_frame_view) {
|
||||
FillSubFrameView(capture, sub_frame_index, capture_sub_frame_view);
|
||||
|
||||
if (linear_output) {
|
||||
RTC_DCHECK(linear_output_framer);
|
||||
RTC_DCHECK(linear_output_block);
|
||||
RTC_DCHECK(linear_output_sub_frame_view);
|
||||
FillSubFrameView(linear_output, sub_frame_index,
|
||||
linear_output_sub_frame_view);
|
||||
}
|
||||
|
||||
capture_blocker->InsertSubFrameAndExtractBlock(*capture_sub_frame_view,
|
||||
capture_block);
|
||||
block_processor->ProcessCapture(
|
||||
/*echo_path_gain_change=*/level_change ||
|
||||
aec_reference_is_downmixed_stereo,
|
||||
saturated_microphone_signal, linear_output_block, capture_block);
|
||||
output_framer->InsertBlockAndExtractSubFrame(*capture_block,
|
||||
capture_sub_frame_view);
|
||||
|
||||
if (linear_output) {
|
||||
RTC_DCHECK(linear_output_framer);
|
||||
linear_output_framer->InsertBlockAndExtractSubFrame(
|
||||
*linear_output_block, linear_output_sub_frame_view);
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessRemainingCaptureFrameContent(bool level_change,
|
||||
bool aec_reference_is_downmixed_stereo,
|
||||
bool saturated_microphone_signal,
|
||||
FrameBlocker* capture_blocker,
|
||||
BlockFramer* linear_output_framer,
|
||||
BlockFramer* output_framer,
|
||||
BlockProcessor* block_processor,
|
||||
Block* linear_output_block,
|
||||
Block* block) {
|
||||
if (!capture_blocker->IsBlockAvailable()) {
|
||||
return;
|
||||
}
|
||||
|
||||
capture_blocker->ExtractBlock(block);
|
||||
block_processor->ProcessCapture(
|
||||
/*echo_path_gain_change=*/level_change ||
|
||||
aec_reference_is_downmixed_stereo,
|
||||
saturated_microphone_signal, linear_output_block, block);
|
||||
output_framer->InsertBlock(*block);
|
||||
|
||||
if (linear_output_framer) {
|
||||
RTC_DCHECK(linear_output_block);
|
||||
linear_output_framer->InsertBlock(*linear_output_block);
|
||||
}
|
||||
}
|
||||
|
||||
void BufferRenderFrameContent(
|
||||
bool proper_downmix_needed,
|
||||
std::vector<std::vector<std::vector<float>>>* render_frame,
|
||||
size_t sub_frame_index,
|
||||
FrameBlocker* render_blocker,
|
||||
BlockProcessor* block_processor,
|
||||
Block* block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
|
||||
FillSubFrameView(proper_downmix_needed, render_frame, sub_frame_index,
|
||||
sub_frame_view);
|
||||
render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block);
|
||||
block_processor->BufferRender(*block);
|
||||
}
|
||||
|
||||
void BufferRemainingRenderFrameContent(FrameBlocker* render_blocker,
|
||||
BlockProcessor* block_processor,
|
||||
Block* block) {
|
||||
if (!render_blocker->IsBlockAvailable()) {
|
||||
return;
|
||||
}
|
||||
render_blocker->ExtractBlock(block);
|
||||
block_processor->BufferRender(*block);
|
||||
}
|
||||
|
||||
void CopyBufferIntoFrame(const AudioBuffer& buffer,
|
||||
size_t num_bands,
|
||||
size_t num_channels,
|
||||
std::vector<std::vector<std::vector<float>>>* frame) {
|
||||
RTC_DCHECK_EQ(num_bands, frame->size());
|
||||
RTC_DCHECK_EQ(num_channels, (*frame)[0].size());
|
||||
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, (*frame)[0][0].size());
|
||||
for (size_t band = 0; band < num_bands; ++band) {
|
||||
for (size_t channel = 0; channel < num_channels; ++channel) {
|
||||
rtc::ArrayView<const float> buffer_view(
|
||||
&buffer.split_bands_const(channel)[band][0],
|
||||
AudioBuffer::kSplitBandSize);
|
||||
std::copy(buffer_view.begin(), buffer_view.end(),
|
||||
(*frame)[band][channel].begin());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// TODO(webrtc:5298): Move this to a separate file.
|
||||
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
EchoCanceller3Config adjusted_cfg = config;
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3StereoContentDetectionKillSwitch")) {
|
||||
adjusted_cfg.multi_channel.detect_stereo_content = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) {
|
||||
adjusted_cfg.suppressor.high_bands_suppression
|
||||
.anti_howling_activation_threshold = 25.f;
|
||||
adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 0.01f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3UseShortConfigChangeDuration")) {
|
||||
adjusted_cfg.filter.config_change_duration_blocks = 10;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3UseZeroInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = 0.f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseDot1SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = .1f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseDot2SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = .2f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseDot3SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = .3f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseDot6SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = .6f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseDot9SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = .9f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3Use1Dot2SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = 1.2f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3Use1Dot6SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = 1.6f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3Use2Dot0SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = 2.0f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3HighPassFilterEchoReference")) {
|
||||
adjusted_cfg.filter.high_pass_filter_echo_reference = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) {
|
||||
adjusted_cfg.ep_strength.echo_can_saturate = false;
|
||||
}
|
||||
|
||||
const std::string use_nearend_reverb_len_tunings =
|
||||
field_trial::FindFullName("WebRTC-Aec3UseNearendReverbLen");
|
||||
FieldTrialParameter<double> nearend_reverb_default_len(
|
||||
"default_len", adjusted_cfg.ep_strength.default_len);
|
||||
FieldTrialParameter<double> nearend_reverb_nearend_len(
|
||||
"nearend_len", adjusted_cfg.ep_strength.nearend_len);
|
||||
|
||||
ParseFieldTrial({&nearend_reverb_default_len, &nearend_reverb_nearend_len},
|
||||
use_nearend_reverb_len_tunings);
|
||||
float default_len = static_cast<float>(nearend_reverb_default_len.Get());
|
||||
float nearend_len = static_cast<float>(nearend_reverb_nearend_len.Get());
|
||||
if (default_len > -1 && default_len < 1 && nearend_len > -1 &&
|
||||
nearend_len < 1) {
|
||||
adjusted_cfg.ep_strength.default_len =
|
||||
static_cast<float>(nearend_reverb_default_len.Get());
|
||||
adjusted_cfg.ep_strength.nearend_len =
|
||||
static_cast<float>(nearend_reverb_nearend_len.Get());
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3ConservativeTailFreqResponse")) {
|
||||
adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsDisabled("WebRTC-Aec3ConservativeTailFreqResponse")) {
|
||||
adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) {
|
||||
// Two blocks headroom.
|
||||
adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToZeroKillSwitch")) {
|
||||
adjusted_cfg.erle.clamp_quality_estimate_to_zero = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToOneKillSwitch")) {
|
||||
adjusted_cfg.erle.clamp_quality_estimate_to_one = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3OnsetDetectionKillSwitch")) {
|
||||
adjusted_cfg.erle.onset_detection = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceRenderDelayEstimationDownmixing")) {
|
||||
adjusted_cfg.delay.render_alignment_mixing.downmix = true;
|
||||
adjusted_cfg.delay.render_alignment_mixing.adaptive_selection = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceCaptureDelayEstimationDownmixing")) {
|
||||
adjusted_cfg.delay.capture_alignment_mixing.downmix = true;
|
||||
adjusted_cfg.delay.capture_alignment_mixing.adaptive_selection = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceCaptureDelayEstimationLeftRightPrioritization")) {
|
||||
adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
|
||||
true;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-"
|
||||
"Aec3RenderDelayEstimationLeftRightPrioritizationKillSwitch")) {
|
||||
adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
|
||||
false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3SensitiveDominantNearendActivation")) {
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.5f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3VerySensitiveDominantNearendActivation")) {
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.75f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3TransparentAntiHowlingGain")) {
|
||||
adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 1.f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceMoreTransparentNormalSuppressorTuning")) {
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = 0.4f;
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = 0.5f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceMoreTransparentNearendSuppressorTuning")) {
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = 1.29f;
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = 1.3f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceMoreTransparentNormalSuppressorHfTuning")) {
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = 0.3f;
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = 0.4f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceMoreTransparentNearendSuppressorHfTuning")) {
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = 1.09f;
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = 1.1f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceRapidlyAdjustingNormalSuppressorTunings")) {
|
||||
adjusted_cfg.suppressor.normal_tuning.max_inc_factor = 2.5f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceRapidlyAdjustingNearendSuppressorTunings")) {
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = 2.5f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceSlowlyAdjustingNormalSuppressorTunings")) {
|
||||
adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = .2f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceSlowlyAdjustingNearendSuppressorTunings")) {
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = .2f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3EnforceConservativeHfSuppression")) {
|
||||
adjusted_cfg.suppressor.conservative_hf_suppression = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3EnforceStationarityProperties")) {
|
||||
adjusted_cfg.echo_audibility.use_stationarity_properties = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceStationarityPropertiesAtInit")) {
|
||||
adjusted_cfg.echo_audibility.use_stationarity_properties_at_init = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3EnforceLowActiveRenderLimit")) {
|
||||
adjusted_cfg.render_levels.active_render_limit = 50.f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceVeryLowActiveRenderLimit")) {
|
||||
adjusted_cfg.render_levels.active_render_limit = 30.f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3NonlinearModeReverbKillSwitch")) {
|
||||
adjusted_cfg.echo_model.model_reverb_in_nonlinear_mode = false;
|
||||
}
|
||||
|
||||
// Field-trial based override for the whole suppressor tuning.
|
||||
const std::string suppressor_tuning_override_trial_name =
|
||||
field_trial::FindFullName("WebRTC-Aec3SuppressorTuningOverride");
|
||||
|
||||
FieldTrialParameter<double> nearend_tuning_mask_lf_enr_transparent(
|
||||
"nearend_tuning_mask_lf_enr_transparent",
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
|
||||
FieldTrialParameter<double> nearend_tuning_mask_lf_enr_suppress(
|
||||
"nearend_tuning_mask_lf_enr_suppress",
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
|
||||
FieldTrialParameter<double> nearend_tuning_mask_hf_enr_transparent(
|
||||
"nearend_tuning_mask_hf_enr_transparent",
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
|
||||
FieldTrialParameter<double> nearend_tuning_mask_hf_enr_suppress(
|
||||
"nearend_tuning_mask_hf_enr_suppress",
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
|
||||
FieldTrialParameter<double> nearend_tuning_max_inc_factor(
|
||||
"nearend_tuning_max_inc_factor",
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
|
||||
FieldTrialParameter<double> nearend_tuning_max_dec_factor_lf(
|
||||
"nearend_tuning_max_dec_factor_lf",
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
|
||||
FieldTrialParameter<double> normal_tuning_mask_lf_enr_transparent(
|
||||
"normal_tuning_mask_lf_enr_transparent",
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
|
||||
FieldTrialParameter<double> normal_tuning_mask_lf_enr_suppress(
|
||||
"normal_tuning_mask_lf_enr_suppress",
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
|
||||
FieldTrialParameter<double> normal_tuning_mask_hf_enr_transparent(
|
||||
"normal_tuning_mask_hf_enr_transparent",
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
|
||||
FieldTrialParameter<double> normal_tuning_mask_hf_enr_suppress(
|
||||
"normal_tuning_mask_hf_enr_suppress",
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
|
||||
FieldTrialParameter<double> normal_tuning_max_inc_factor(
|
||||
"normal_tuning_max_inc_factor",
|
||||
adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
|
||||
FieldTrialParameter<double> normal_tuning_max_dec_factor_lf(
|
||||
"normal_tuning_max_dec_factor_lf",
|
||||
adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
|
||||
FieldTrialParameter<double> dominant_nearend_detection_enr_threshold(
|
||||
"dominant_nearend_detection_enr_threshold",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
|
||||
FieldTrialParameter<double> dominant_nearend_detection_enr_exit_threshold(
|
||||
"dominant_nearend_detection_enr_exit_threshold",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
|
||||
FieldTrialParameter<double> dominant_nearend_detection_snr_threshold(
|
||||
"dominant_nearend_detection_snr_threshold",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
|
||||
FieldTrialParameter<int> dominant_nearend_detection_hold_duration(
|
||||
"dominant_nearend_detection_hold_duration",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
|
||||
FieldTrialParameter<int> dominant_nearend_detection_trigger_threshold(
|
||||
"dominant_nearend_detection_trigger_threshold",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
|
||||
|
||||
ParseFieldTrial(
|
||||
{&nearend_tuning_mask_lf_enr_transparent,
|
||||
&nearend_tuning_mask_lf_enr_suppress,
|
||||
&nearend_tuning_mask_hf_enr_transparent,
|
||||
&nearend_tuning_mask_hf_enr_suppress, &nearend_tuning_max_inc_factor,
|
||||
&nearend_tuning_max_dec_factor_lf,
|
||||
&normal_tuning_mask_lf_enr_transparent,
|
||||
&normal_tuning_mask_lf_enr_suppress,
|
||||
&normal_tuning_mask_hf_enr_transparent,
|
||||
&normal_tuning_mask_hf_enr_suppress, &normal_tuning_max_inc_factor,
|
||||
&normal_tuning_max_dec_factor_lf,
|
||||
&dominant_nearend_detection_enr_threshold,
|
||||
&dominant_nearend_detection_enr_exit_threshold,
|
||||
&dominant_nearend_detection_snr_threshold,
|
||||
&dominant_nearend_detection_hold_duration,
|
||||
&dominant_nearend_detection_trigger_threshold},
|
||||
suppressor_tuning_override_trial_name);
|
||||
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent =
|
||||
static_cast<float>(nearend_tuning_mask_lf_enr_transparent.Get());
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress =
|
||||
static_cast<float>(nearend_tuning_mask_lf_enr_suppress.Get());
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent =
|
||||
static_cast<float>(nearend_tuning_mask_hf_enr_transparent.Get());
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress =
|
||||
static_cast<float>(nearend_tuning_mask_hf_enr_suppress.Get());
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_inc_factor =
|
||||
static_cast<float>(nearend_tuning_max_inc_factor.Get());
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf =
|
||||
static_cast<float>(nearend_tuning_max_dec_factor_lf.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent =
|
||||
static_cast<float>(normal_tuning_mask_lf_enr_transparent.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress =
|
||||
static_cast<float>(normal_tuning_mask_lf_enr_suppress.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent =
|
||||
static_cast<float>(normal_tuning_mask_hf_enr_transparent.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress =
|
||||
static_cast<float>(normal_tuning_mask_hf_enr_suppress.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.max_inc_factor =
|
||||
static_cast<float>(normal_tuning_max_inc_factor.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf =
|
||||
static_cast<float>(normal_tuning_max_dec_factor_lf.Get());
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold =
|
||||
static_cast<float>(dominant_nearend_detection_enr_threshold.Get());
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold =
|
||||
static_cast<float>(dominant_nearend_detection_enr_exit_threshold.Get());
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold =
|
||||
static_cast<float>(dominant_nearend_detection_snr_threshold.Get());
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration =
|
||||
dominant_nearend_detection_hold_duration.Get();
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold =
|
||||
dominant_nearend_detection_trigger_threshold.Get();
|
||||
|
||||
// Field trial-based overrides of individual suppressor parameters.
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendLfMaskTransparentOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendLfMaskSuppressOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendHfMaskTransparentOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendHfMaskSuppressOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendMaxIncFactorOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendMaxDecFactorLfOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
|
||||
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalLfMaskTransparentOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalLfMaskSuppressOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalHfMaskTransparentOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalHfMaskSuppressOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalMaxIncFactorOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalMaxDecFactorLfOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
|
||||
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorDominantNearendEnrThresholdOverride", 0.f, 100.f,
|
||||
&adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorDominantNearendEnrExitThresholdOverride", 0.f,
|
||||
100.f,
|
||||
&adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorDominantNearendSnrThresholdOverride", 0.f, 100.f,
|
||||
&adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorDominantNearendHoldDurationOverride", 0, 1000,
|
||||
&adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorDominantNearendTriggerThresholdOverride", 0, 1000,
|
||||
&adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
|
||||
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorAntiHowlingGainOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain);
|
||||
|
||||
// Field trial-based overrides of individual delay estimator parameters.
|
||||
RetrieveFieldTrialValue("WebRTC-Aec3DelayEstimateSmoothingOverride", 0.f, 1.f,
|
||||
&adjusted_cfg.delay.delay_estimate_smoothing);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3DelayEstimateSmoothingDelayFoundOverride", 0.f, 1.f,
|
||||
&adjusted_cfg.delay.delay_estimate_smoothing_delay_found);
|
||||
|
||||
int max_allowed_excess_render_blocks_override =
|
||||
adjusted_cfg.buffering.max_allowed_excess_render_blocks;
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3BufferingMaxAllowedExcessRenderBlocksOverride", 0, 20,
|
||||
&max_allowed_excess_render_blocks_override);
|
||||
adjusted_cfg.buffering.max_allowed_excess_render_blocks =
|
||||
max_allowed_excess_render_blocks_override;
|
||||
return adjusted_cfg;
|
||||
}
|
||||
|
||||
class EchoCanceller3::RenderWriter {
|
||||
public:
|
||||
RenderWriter(ApmDataDumper* data_dumper,
|
||||
const EchoCanceller3Config& config,
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>* render_transfer_queue,
|
||||
size_t num_bands,
|
||||
size_t num_channels);
|
||||
|
||||
RenderWriter() = delete;
|
||||
RenderWriter(const RenderWriter&) = delete;
|
||||
RenderWriter& operator=(const RenderWriter&) = delete;
|
||||
|
||||
~RenderWriter();
|
||||
void Insert(const AudioBuffer& input);
|
||||
|
||||
private:
|
||||
ApmDataDumper* data_dumper_;
|
||||
const size_t num_bands_;
|
||||
const size_t num_channels_;
|
||||
std::unique_ptr<HighPassFilter> high_pass_filter_;
|
||||
std::vector<std::vector<std::vector<float>>> render_queue_input_frame_;
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>* render_transfer_queue_;
|
||||
};
|
||||
|
||||
EchoCanceller3::RenderWriter::RenderWriter(
|
||||
ApmDataDumper* data_dumper,
|
||||
const EchoCanceller3Config& config,
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>* render_transfer_queue,
|
||||
size_t num_bands,
|
||||
size_t num_channels)
|
||||
: data_dumper_(data_dumper),
|
||||
num_bands_(num_bands),
|
||||
num_channels_(num_channels),
|
||||
render_queue_input_frame_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_channels_,
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
render_transfer_queue_(render_transfer_queue) {
|
||||
RTC_DCHECK(data_dumper);
|
||||
if (config.filter.high_pass_filter_echo_reference) {
|
||||
high_pass_filter_ = std::make_unique<HighPassFilter>(16000, num_channels);
|
||||
}
|
||||
}
|
||||
|
||||
EchoCanceller3::RenderWriter::~RenderWriter() = default;
|
||||
|
||||
void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) {
|
||||
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band());
|
||||
RTC_DCHECK_EQ(num_bands_, input.num_bands());
|
||||
RTC_DCHECK_EQ(num_channels_, input.num_channels());
|
||||
|
||||
// TODO(bugs.webrtc.org/8759) Temporary work-around.
|
||||
if (num_bands_ != input.num_bands())
|
||||
return;
|
||||
|
||||
data_dumper_->DumpWav("aec3_render_input", AudioBuffer::kSplitBandSize,
|
||||
&input.split_bands_const(0)[0][0], 16000, 1);
|
||||
|
||||
CopyBufferIntoFrame(input, num_bands_, num_channels_,
|
||||
&render_queue_input_frame_);
|
||||
if (high_pass_filter_) {
|
||||
high_pass_filter_->Process(&render_queue_input_frame_[0]);
|
||||
}
|
||||
|
||||
static_cast<void>(render_transfer_queue_->Insert(&render_queue_input_frame_));
|
||||
}
|
||||
|
||||
std::atomic<int> EchoCanceller3::instance_count_(0);
|
||||
|
||||
EchoCanceller3::EchoCanceller3(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
config_(AdjustConfig(config)),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
num_bands_(NumBandsForRate(sample_rate_hz_)),
|
||||
num_render_input_channels_(num_render_channels),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
config_selector_(AdjustConfig(config),
|
||||
multichannel_config,
|
||||
num_render_input_channels_),
|
||||
multichannel_content_detector_(
|
||||
config_selector_.active_config().multi_channel.detect_stereo_content,
|
||||
num_render_input_channels_,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_threshold,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_timeout_threshold_seconds,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_hysteresis_seconds),
|
||||
output_framer_(num_bands_, num_capture_channels_),
|
||||
capture_blocker_(num_bands_, num_capture_channels_),
|
||||
render_transfer_queue_(
|
||||
kRenderTransferQueueSizeFrames,
|
||||
std::vector<std::vector<std::vector<float>>>(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_render_input_channels_,
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
Aec3RenderQueueItemVerifier(num_bands_,
|
||||
num_render_input_channels_,
|
||||
AudioBuffer::kSplitBandSize)),
|
||||
render_queue_output_frame_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_render_input_channels_,
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
render_block_(num_bands_, num_render_input_channels_),
|
||||
capture_block_(num_bands_, num_capture_channels_),
|
||||
capture_sub_frame_view_(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<float>>(num_capture_channels_)) {
|
||||
RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
|
||||
|
||||
if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) {
|
||||
block_delay_buffer_.reset(new BlockDelayBuffer(
|
||||
num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize,
|
||||
config_.delay.fixed_capture_delay_samples));
|
||||
}
|
||||
|
||||
render_writer_.reset(new RenderWriter(
|
||||
data_dumper_.get(), config_selector_.active_config(),
|
||||
&render_transfer_queue_, num_bands_, num_render_input_channels_));
|
||||
|
||||
RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000);
|
||||
RTC_DCHECK_GE(kMaxNumBands, num_bands_);
|
||||
|
||||
if (config_selector_.active_config().filter.export_linear_aec_output) {
|
||||
linear_output_framer_.reset(
|
||||
new BlockFramer(/*num_bands=*/1, num_capture_channels_));
|
||||
linear_output_block_ =
|
||||
std::make_unique<Block>(/*num_bands=*/1, num_capture_channels_),
|
||||
linear_output_sub_frame_view_ =
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>(
|
||||
1, std::vector<rtc::ArrayView<float>>(num_capture_channels_));
|
||||
}
|
||||
|
||||
Initialize();
|
||||
|
||||
RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_
|
||||
<< " Hz, num render channels: " << num_render_input_channels_
|
||||
<< ", num capture channels: " << num_capture_channels_;
|
||||
}
|
||||
|
||||
EchoCanceller3::~EchoCanceller3() = default;
|
||||
|
||||
void EchoCanceller3::Initialize() {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
|
||||
num_render_channels_to_aec_ =
|
||||
multichannel_content_detector_.IsProperMultiChannelContentDetected()
|
||||
? num_render_input_channels_
|
||||
: 1;
|
||||
|
||||
config_selector_.Update(
|
||||
multichannel_content_detector_.IsProperMultiChannelContentDetected());
|
||||
|
||||
render_block_.SetNumChannels(num_render_channels_to_aec_);
|
||||
|
||||
render_blocker_.reset(
|
||||
new FrameBlocker(num_bands_, num_render_channels_to_aec_));
|
||||
|
||||
block_processor_.reset(BlockProcessor::Create(
|
||||
config_selector_.active_config(), sample_rate_hz_,
|
||||
num_render_channels_to_aec_, num_capture_channels_));
|
||||
|
||||
render_sub_frame_view_ = std::vector<std::vector<rtc::ArrayView<float>>>(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<float>>(num_render_channels_to_aec_));
|
||||
}
|
||||
|
||||
void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_);
|
||||
|
||||
RTC_DCHECK_EQ(render.num_channels(), num_render_input_channels_);
|
||||
data_dumper_->DumpRaw("aec3_call_order",
|
||||
static_cast<int>(EchoCanceller3ApiCall::kRender));
|
||||
|
||||
return render_writer_->Insert(render);
|
||||
}
|
||||
|
||||
void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
data_dumper_->DumpWav("aec3_capture_analyze_input", capture.num_frames(),
|
||||
capture.channels_const()[0], sample_rate_hz_, 1);
|
||||
saturated_microphone_signal_ = false;
|
||||
for (size_t channel = 0; channel < capture.num_channels(); ++channel) {
|
||||
saturated_microphone_signal_ |=
|
||||
DetectSaturation(rtc::ArrayView<const float>(
|
||||
capture.channels_const()[channel], capture.num_frames()));
|
||||
if (saturated_microphone_signal_) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) {
|
||||
ProcessCapture(capture, nullptr, level_change);
|
||||
}
|
||||
|
||||
void EchoCanceller3::ProcessCapture(AudioBuffer* capture,
|
||||
AudioBuffer* linear_output,
|
||||
bool level_change) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
RTC_DCHECK(capture);
|
||||
RTC_DCHECK_EQ(num_bands_, capture->num_bands());
|
||||
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band());
|
||||
RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_);
|
||||
data_dumper_->DumpRaw("aec3_call_order",
|
||||
static_cast<int>(EchoCanceller3ApiCall::kCapture));
|
||||
|
||||
if (linear_output && !linear_output_framer_) {
|
||||
RTC_LOG(LS_ERROR) << "Trying to retrieve the linear AEC output without "
|
||||
"properly configuring AEC3.";
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
}
|
||||
|
||||
// Report capture call in the metrics and periodically update API call
|
||||
// metrics.
|
||||
api_call_metrics_.ReportCaptureCall();
|
||||
|
||||
// Optionally delay the capture signal.
|
||||
if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) {
|
||||
RTC_DCHECK(block_delay_buffer_);
|
||||
block_delay_buffer_->DelaySignal(capture);
|
||||
}
|
||||
|
||||
rtc::ArrayView<float> capture_lower_band = rtc::ArrayView<float>(
|
||||
&capture->split_bands(0)[0][0], AudioBuffer::kSplitBandSize);
|
||||
|
||||
data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, 16000, 1);
|
||||
|
||||
EmptyRenderQueue();
|
||||
|
||||
ProcessCaptureFrameContent(
|
||||
linear_output, capture, level_change,
|
||||
multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(),
|
||||
saturated_microphone_signal_, 0, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_, block_processor_.get(),
|
||||
linear_output_block_.get(), &linear_output_sub_frame_view_,
|
||||
&capture_block_, &capture_sub_frame_view_);
|
||||
|
||||
ProcessCaptureFrameContent(
|
||||
linear_output, capture, level_change,
|
||||
multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(),
|
||||
saturated_microphone_signal_, 1, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_, block_processor_.get(),
|
||||
linear_output_block_.get(), &linear_output_sub_frame_view_,
|
||||
&capture_block_, &capture_sub_frame_view_);
|
||||
|
||||
ProcessRemainingCaptureFrameContent(
|
||||
level_change,
|
||||
multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(),
|
||||
saturated_microphone_signal_, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_, block_processor_.get(),
|
||||
linear_output_block_.get(), &capture_block_);
|
||||
|
||||
data_dumper_->DumpWav("aec3_capture_output", AudioBuffer::kSplitBandSize,
|
||||
&capture->split_bands(0)[0][0], 16000, 1);
|
||||
}
|
||||
|
||||
EchoControl::Metrics EchoCanceller3::GetMetrics() const {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
Metrics metrics;
|
||||
block_processor_->GetMetrics(&metrics);
|
||||
return metrics;
|
||||
}
|
||||
|
||||
void EchoCanceller3::SetAudioBufferDelay(int delay_ms) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
block_processor_->SetAudioBufferDelay(delay_ms);
|
||||
}
|
||||
|
||||
void EchoCanceller3::SetCaptureOutputUsage(bool capture_output_used) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
block_processor_->SetCaptureOutputUsage(capture_output_used);
|
||||
}
|
||||
|
||||
bool EchoCanceller3::ActiveProcessing() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
EchoCanceller3Config EchoCanceller3::CreateDefaultMultichannelConfig() {
|
||||
EchoCanceller3Config cfg;
|
||||
// Use shorter and more rapidly adapting coarse filter to compensate for
|
||||
// thge increased number of total filter parameters to adapt.
|
||||
cfg.filter.coarse.length_blocks = 11;
|
||||
cfg.filter.coarse.rate = 0.95f;
|
||||
cfg.filter.coarse_initial.length_blocks = 11;
|
||||
cfg.filter.coarse_initial.rate = 0.95f;
|
||||
|
||||
// Use more concervative suppressor behavior for non-nearend speech.
|
||||
cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
|
||||
cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
void EchoCanceller3::SetBlockProcessorForTesting(
|
||||
std::unique_ptr<BlockProcessor> block_processor) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
RTC_DCHECK(block_processor);
|
||||
block_processor_ = std::move(block_processor);
|
||||
}
|
||||
|
||||
void EchoCanceller3::EmptyRenderQueue() {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
bool frame_to_buffer =
|
||||
render_transfer_queue_.Remove(&render_queue_output_frame_);
|
||||
while (frame_to_buffer) {
|
||||
// Report render call in the metrics.
|
||||
api_call_metrics_.ReportRenderCall();
|
||||
|
||||
if (multichannel_content_detector_.UpdateDetection(
|
||||
render_queue_output_frame_)) {
|
||||
// Reinitialize the AEC when proper stereo is detected.
|
||||
Initialize();
|
||||
}
|
||||
|
||||
// Buffer frame content.
|
||||
BufferRenderFrameContent(
|
||||
/*proper_downmix_needed=*/multichannel_content_detector_
|
||||
.IsTemporaryMultiChannelContentDetected(),
|
||||
&render_queue_output_frame_, 0, render_blocker_.get(),
|
||||
block_processor_.get(), &render_block_, &render_sub_frame_view_);
|
||||
|
||||
BufferRenderFrameContent(
|
||||
/*proper_downmix_needed=*/multichannel_content_detector_
|
||||
.IsTemporaryMultiChannelContentDetected(),
|
||||
&render_queue_output_frame_, 1, render_blocker_.get(),
|
||||
block_processor_.get(), &render_block_, &render_sub_frame_view_);
|
||||
|
||||
BufferRemainingRenderFrameContent(render_blocker_.get(),
|
||||
block_processor_.get(), &render_block_);
|
||||
|
||||
frame_to_buffer =
|
||||
render_transfer_queue_.Remove(&render_queue_output_frame_);
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
230
VocieProcess/modules/audio_processing/aec3/echo_canceller3.h
Normal file
230
VocieProcess/modules/audio_processing/aec3/echo_canceller3.h
Normal file
@ -0,0 +1,230 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "modules/audio_processing/aec3/api_call_jitter_metrics.h"
|
||||
#include "modules/audio_processing/aec3/block_delay_buffer.h"
|
||||
#include "modules/audio_processing/aec3/block_framer.h"
|
||||
#include "modules/audio_processing/aec3/block_processor.h"
|
||||
#include "modules/audio_processing/aec3/config_selector.h"
|
||||
#include "modules/audio_processing/aec3/frame_blocker.h"
|
||||
#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/race_checker.h"
|
||||
#include "rtc_base/swap_queue.h"
|
||||
#include "rtc_base/thread_annotations.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Method for adjusting config parameter dependencies.
|
||||
// Only to be used externally to AEC3 for testing purposes.
|
||||
// TODO(webrtc:5298): Move this to a separate file.
|
||||
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config);
|
||||
|
||||
// Functor for verifying the invariance of the frames being put into the render
|
||||
// queue.
|
||||
class Aec3RenderQueueItemVerifier {
|
||||
public:
|
||||
Aec3RenderQueueItemVerifier(size_t num_bands,
|
||||
size_t num_channels,
|
||||
size_t frame_length)
|
||||
: num_bands_(num_bands),
|
||||
num_channels_(num_channels),
|
||||
frame_length_(frame_length) {}
|
||||
|
||||
bool operator()(const std::vector<std::vector<std::vector<float>>>& v) const {
|
||||
if (v.size() != num_bands_) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& band : v) {
|
||||
if (band.size() != num_channels_) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& channel : band) {
|
||||
if (channel.size() != frame_length_) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
const size_t num_bands_;
|
||||
const size_t num_channels_;
|
||||
const size_t frame_length_;
|
||||
};
|
||||
|
||||
// Main class for the echo canceller3.
|
||||
// It does 4 things:
|
||||
// -Receives 10 ms frames of band-split audio.
|
||||
// -Provides the lower level echo canceller functionality with
|
||||
// blocks of 64 samples of audio data.
|
||||
// -Partially handles the jitter in the render and capture API
|
||||
// call sequence.
|
||||
//
|
||||
// The class is supposed to be used in a non-concurrent manner apart from the
|
||||
// AnalyzeRender call which can be called concurrently with the other methods.
|
||||
class EchoCanceller3 : public EchoControl {
|
||||
public:
|
||||
EchoCanceller3(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
|
||||
~EchoCanceller3() override;
|
||||
|
||||
EchoCanceller3(const EchoCanceller3&) = delete;
|
||||
EchoCanceller3& operator=(const EchoCanceller3&) = delete;
|
||||
|
||||
// Analyzes and stores an internal copy of the split-band domain render
|
||||
// signal.
|
||||
void AnalyzeRender(AudioBuffer* render) override { AnalyzeRender(*render); }
|
||||
// Analyzes the full-band domain capture signal to detect signal saturation.
|
||||
void AnalyzeCapture(AudioBuffer* capture) override {
|
||||
AnalyzeCapture(*capture);
|
||||
}
|
||||
// Processes the split-band domain capture signal in order to remove any echo
|
||||
// present in the signal.
|
||||
void ProcessCapture(AudioBuffer* capture, bool level_change) override;
|
||||
// As above, but also returns the linear filter output.
|
||||
void ProcessCapture(AudioBuffer* capture,
|
||||
AudioBuffer* linear_output,
|
||||
bool level_change) override;
|
||||
// Collect current metrics from the echo canceller.
|
||||
Metrics GetMetrics() const override;
|
||||
// Provides an optional external estimate of the audio buffer delay.
|
||||
void SetAudioBufferDelay(int delay_ms) override;
|
||||
|
||||
// Specifies whether the capture output will be used. The purpose of this is
|
||||
// to allow the echo controller to deactivate some of the processing when the
|
||||
// resulting output is anyway not used, for instance when the endpoint is
|
||||
// muted.
|
||||
void SetCaptureOutputUsage(bool capture_output_used) override;
|
||||
|
||||
bool ActiveProcessing() const override;
|
||||
|
||||
// Signals whether an external detector has detected echo leakage from the
|
||||
// echo canceller.
|
||||
// Note that in the case echo leakage has been flagged, it should be unflagged
|
||||
// once it is no longer occurring.
|
||||
void UpdateEchoLeakageStatus(bool leakage_detected) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
block_processor_->UpdateEchoLeakageStatus(leakage_detected);
|
||||
}
|
||||
|
||||
// Produces a default configuration for multichannel.
|
||||
static EchoCanceller3Config CreateDefaultMultichannelConfig();
|
||||
|
||||
private:
|
||||
friend class EchoCanceller3Tester;
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, DetectionOfProperStereo);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
DetectionOfProperStereoUsingThreshold);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
DetectionOfProperStereoUsingHysteresis);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
StereoContentDetectionForMonoSignals);
|
||||
|
||||
class RenderWriter;
|
||||
|
||||
// (Re-)Initializes the selected subset of the EchoCanceller3 fields, at
|
||||
// creation as well as during reconfiguration.
|
||||
void Initialize();
|
||||
|
||||
// Only for testing. Replaces the internal block processor.
|
||||
void SetBlockProcessorForTesting(
|
||||
std::unique_ptr<BlockProcessor> block_processor);
|
||||
|
||||
// Only for testing. Returns whether stereo processing is active.
|
||||
bool StereoRenderProcessingActiveForTesting() const {
|
||||
return multichannel_content_detector_.IsProperMultiChannelContentDetected();
|
||||
}
|
||||
|
||||
// Only for testing.
|
||||
const EchoCanceller3Config& GetActiveConfigForTesting() const {
|
||||
return config_selector_.active_config();
|
||||
}
|
||||
|
||||
// Empties the render SwapQueue.
|
||||
void EmptyRenderQueue();
|
||||
|
||||
// Analyzes and stores an internal copy of the split-band domain render
|
||||
// signal.
|
||||
void AnalyzeRender(const AudioBuffer& render);
|
||||
// Analyzes the full-band domain capture signal to detect signal saturation.
|
||||
void AnalyzeCapture(const AudioBuffer& capture);
|
||||
|
||||
rtc::RaceChecker capture_race_checker_;
|
||||
rtc::RaceChecker render_race_checker_;
|
||||
|
||||
// State that is accessed by the AnalyzeRender call.
|
||||
std::unique_ptr<RenderWriter> render_writer_
|
||||
RTC_GUARDED_BY(render_race_checker_);
|
||||
|
||||
// State that may be accessed by the capture thread.
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const EchoCanceller3Config config_;
|
||||
const int sample_rate_hz_;
|
||||
const int num_bands_;
|
||||
const size_t num_render_input_channels_;
|
||||
size_t num_render_channels_to_aec_;
|
||||
const size_t num_capture_channels_;
|
||||
ConfigSelector config_selector_;
|
||||
MultiChannelContentDetector multichannel_content_detector_;
|
||||
std::unique_ptr<BlockFramer> linear_output_framer_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::unique_ptr<FrameBlocker> render_blocker_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>
|
||||
render_transfer_queue_;
|
||||
std::unique_ptr<BlockProcessor> block_processor_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<std::vector<float>>> render_queue_output_frame_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
bool saturated_microphone_signal_ RTC_GUARDED_BY(capture_race_checker_) =
|
||||
false;
|
||||
Block render_block_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::unique_ptr<Block> linear_output_block_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
Block capture_block_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<rtc::ArrayView<float>>> render_sub_frame_view_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<rtc::ArrayView<float>>> linear_output_sub_frame_view_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<rtc::ArrayView<float>>> capture_sub_frame_view_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::unique_ptr<BlockDelayBuffer> block_delay_buffer_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
ApiCallJitterMetrics api_call_metrics_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
|
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/echo_path_delay_estimator.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
EchoPathDelayEstimator::EchoPathDelayEstimator(
|
||||
ApmDataDumper* data_dumper,
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(data_dumper),
|
||||
down_sampling_factor_(config.delay.down_sampling_factor),
|
||||
sub_block_size_(down_sampling_factor_ != 0
|
||||
? kBlockSize / down_sampling_factor_
|
||||
: kBlockSize),
|
||||
capture_mixer_(num_capture_channels,
|
||||
config.delay.capture_alignment_mixing),
|
||||
capture_decimator_(down_sampling_factor_),
|
||||
matched_filter_(
|
||||
data_dumper_,
|
||||
DetectOptimization(),
|
||||
sub_block_size_,
|
||||
kMatchedFilterWindowSizeSubBlocks,
|
||||
config.delay.num_filters,
|
||||
kMatchedFilterAlignmentShiftSizeSubBlocks,
|
||||
config.delay.down_sampling_factor == 8
|
||||
? config.render_levels.poor_excitation_render_limit_ds8
|
||||
: config.render_levels.poor_excitation_render_limit,
|
||||
config.delay.delay_estimate_smoothing,
|
||||
config.delay.delay_estimate_smoothing_delay_found,
|
||||
config.delay.delay_candidate_detection_threshold,
|
||||
config.delay.detect_pre_echo),
|
||||
matched_filter_lag_aggregator_(data_dumper_,
|
||||
matched_filter_.GetMaxFilterLag(),
|
||||
config.delay) {
|
||||
RTC_DCHECK(data_dumper);
|
||||
RTC_DCHECK(down_sampling_factor_ > 0);
|
||||
}
|
||||
|
||||
EchoPathDelayEstimator::~EchoPathDelayEstimator() = default;
|
||||
|
||||
void EchoPathDelayEstimator::Reset(bool reset_delay_confidence) {
|
||||
Reset(true, reset_delay_confidence);
|
||||
}
|
||||
|
||||
absl::optional<DelayEstimate> EchoPathDelayEstimator::EstimateDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
const Block& capture) {
|
||||
std::array<float, kBlockSize> downsampled_capture_data;
|
||||
rtc::ArrayView<float> downsampled_capture(downsampled_capture_data.data(),
|
||||
sub_block_size_);
|
||||
|
||||
std::array<float, kBlockSize> downmixed_capture;
|
||||
capture_mixer_.ProduceOutput(capture, downmixed_capture);
|
||||
capture_decimator_.Decimate(downmixed_capture, downsampled_capture);
|
||||
data_dumper_->DumpWav("aec3_capture_decimator_output",
|
||||
downsampled_capture.size(), downsampled_capture.data(),
|
||||
16000 / down_sampling_factor_, 1);
|
||||
matched_filter_.Update(render_buffer, downsampled_capture,
|
||||
matched_filter_lag_aggregator_.ReliableDelayFound());
|
||||
|
||||
absl::optional<DelayEstimate> aggregated_matched_filter_lag =
|
||||
matched_filter_lag_aggregator_.Aggregate(
|
||||
matched_filter_.GetBestLagEstimate());
|
||||
|
||||
// Run clockdrift detection.
|
||||
if (aggregated_matched_filter_lag &&
|
||||
(*aggregated_matched_filter_lag).quality ==
|
||||
DelayEstimate::Quality::kRefined)
|
||||
clockdrift_detector_.Update(
|
||||
matched_filter_lag_aggregator_.GetDelayAtHighestPeak());
|
||||
|
||||
// TODO(peah): Move this logging outside of this class once EchoCanceller3
|
||||
// development is done.
|
||||
data_dumper_->DumpRaw(
|
||||
"aec3_echo_path_delay_estimator_delay",
|
||||
aggregated_matched_filter_lag
|
||||
? static_cast<int>(aggregated_matched_filter_lag->delay *
|
||||
down_sampling_factor_)
|
||||
: -1);
|
||||
|
||||
// Return the detected delay in samples as the aggregated matched filter lag
|
||||
// compensated by the down sampling factor for the signal being correlated.
|
||||
if (aggregated_matched_filter_lag) {
|
||||
aggregated_matched_filter_lag->delay *= down_sampling_factor_;
|
||||
}
|
||||
|
||||
if (old_aggregated_lag_ && aggregated_matched_filter_lag &&
|
||||
old_aggregated_lag_->delay == aggregated_matched_filter_lag->delay) {
|
||||
++consistent_estimate_counter_;
|
||||
} else {
|
||||
consistent_estimate_counter_ = 0;
|
||||
}
|
||||
old_aggregated_lag_ = aggregated_matched_filter_lag;
|
||||
constexpr size_t kNumBlocksPerSecondBy2 = kNumBlocksPerSecond / 2;
|
||||
if (consistent_estimate_counter_ > kNumBlocksPerSecondBy2) {
|
||||
Reset(false, false);
|
||||
}
|
||||
|
||||
return aggregated_matched_filter_lag;
|
||||
}
|
||||
|
||||
void EchoPathDelayEstimator::Reset(bool reset_lag_aggregator,
|
||||
bool reset_delay_confidence) {
|
||||
if (reset_lag_aggregator) {
|
||||
matched_filter_lag_aggregator_.Reset(reset_delay_confidence);
|
||||
}
|
||||
matched_filter_.Reset(/*full_reset=*/reset_lag_aggregator);
|
||||
old_aggregated_lag_ = absl::nullopt;
|
||||
consistent_estimate_counter_ = 0;
|
||||
}
|
||||
} // namespace webrtc
|
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/alignment_mixer.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "modules/audio_processing/aec3/clockdrift_detector.h"
|
||||
#include "modules/audio_processing/aec3/decimator.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/matched_filter.h"
|
||||
#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
struct DownsampledRenderBuffer;
|
||||
struct EchoCanceller3Config;
|
||||
|
||||
// Estimates the delay of the echo path.
|
||||
class EchoPathDelayEstimator {
|
||||
public:
|
||||
EchoPathDelayEstimator(ApmDataDumper* data_dumper,
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
~EchoPathDelayEstimator();
|
||||
|
||||
EchoPathDelayEstimator(const EchoPathDelayEstimator&) = delete;
|
||||
EchoPathDelayEstimator& operator=(const EchoPathDelayEstimator&) = delete;
|
||||
|
||||
// Resets the estimation. If the delay confidence is reset, the reset behavior
|
||||
// is as if the call is restarted.
|
||||
void Reset(bool reset_delay_confidence);
|
||||
|
||||
// Produce a delay estimate if such is avaliable.
|
||||
absl::optional<DelayEstimate> EstimateDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
const Block& capture);
|
||||
|
||||
// Log delay estimator properties.
|
||||
void LogDelayEstimationProperties(int sample_rate_hz, size_t shift) const {
|
||||
matched_filter_.LogFilterProperties(sample_rate_hz, shift,
|
||||
down_sampling_factor_);
|
||||
}
|
||||
|
||||
// Returns the level of detected clockdrift.
|
||||
ClockdriftDetector::Level Clockdrift() const {
|
||||
return clockdrift_detector_.ClockdriftLevel();
|
||||
}
|
||||
|
||||
private:
|
||||
ApmDataDumper* const data_dumper_;
|
||||
const size_t down_sampling_factor_;
|
||||
const size_t sub_block_size_;
|
||||
AlignmentMixer capture_mixer_;
|
||||
Decimator capture_decimator_;
|
||||
MatchedFilter matched_filter_;
|
||||
MatchedFilterLagAggregator matched_filter_lag_aggregator_;
|
||||
absl::optional<DelayEstimate> old_aggregated_lag_;
|
||||
size_t consistent_estimate_counter_ = 0;
|
||||
ClockdriftDetector clockdrift_detector_;
|
||||
|
||||
// Internal reset method with more granularity.
|
||||
void Reset(bool reset_lag_aggregator, bool reset_delay_confidence);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
|
@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
EchoPathVariability::EchoPathVariability(bool gain_change,
|
||||
DelayAdjustment delay_change,
|
||||
bool clock_drift)
|
||||
: gain_change(gain_change),
|
||||
delay_change(delay_change),
|
||||
clock_drift(clock_drift) {}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
struct EchoPathVariability {
|
||||
enum class DelayAdjustment { kNone, kBufferFlush, kNewDetectedDelay };
|
||||
|
||||
EchoPathVariability(bool gain_change,
|
||||
DelayAdjustment delay_change,
|
||||
bool clock_drift);
|
||||
|
||||
bool AudioPathChanged() const {
|
||||
return gain_change || delay_change != DelayAdjustment::kNone;
|
||||
}
|
||||
bool gain_change;
|
||||
DelayAdjustment delay_change;
|
||||
bool clock_drift;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
|
521
VocieProcess/modules/audio_processing/aec3/echo_remover.cc
Normal file
521
VocieProcess/modules/audio_processing/aec3/echo_remover.cc
Normal file
@ -0,0 +1,521 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/echo_remover.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec3_fft.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "modules/audio_processing/aec3/comfort_noise_generator.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/echo_remover_metrics.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
|
||||
#include "modules/audio_processing/aec3/residual_echo_estimator.h"
|
||||
#include "modules/audio_processing/aec3/subtractor.h"
|
||||
#include "modules/audio_processing/aec3/subtractor_output.h"
|
||||
#include "modules/audio_processing/aec3/suppression_filter.h"
|
||||
#include "modules/audio_processing/aec3/suppression_gain.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Maximum number of channels for which the capture channel data is stored on
|
||||
// the stack. If the number of channels are larger than this, they are stored
|
||||
// using scratch memory that is pre-allocated on the heap. The reason for this
|
||||
// partitioning is not to waste heap space for handling the more common numbers
|
||||
// of channels, while at the same time not limiting the support for higher
|
||||
// numbers of channels by enforcing the capture channel data to be stored on the
|
||||
// stack using a fixed maximum value.
|
||||
constexpr size_t kMaxNumChannelsOnStack = 2;
|
||||
|
||||
// Chooses the number of channels to store on the heap when that is required due
|
||||
// to the number of capture channels being larger than the pre-defined number
|
||||
// of channels to store on the stack.
|
||||
size_t NumChannelsOnHeap(size_t num_capture_channels) {
|
||||
return num_capture_channels > kMaxNumChannelsOnStack ? num_capture_channels
|
||||
: 0;
|
||||
}
|
||||
|
||||
void LinearEchoPower(const FftData& E,
|
||||
const FftData& Y,
|
||||
std::array<float, kFftLengthBy2Plus1>* S2) {
|
||||
for (size_t k = 0; k < E.re.size(); ++k) {
|
||||
(*S2)[k] = (Y.re[k] - E.re[k]) * (Y.re[k] - E.re[k]) +
|
||||
(Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]);
|
||||
}
|
||||
}
|
||||
|
||||
// Fades between two input signals using a fix-sized transition.
|
||||
void SignalTransition(rtc::ArrayView<const float> from,
|
||||
rtc::ArrayView<const float> to,
|
||||
rtc::ArrayView<float> out) {
|
||||
if (from == to) {
|
||||
RTC_DCHECK_EQ(to.size(), out.size());
|
||||
std::copy(to.begin(), to.end(), out.begin());
|
||||
} else {
|
||||
constexpr size_t kTransitionSize = 30;
|
||||
constexpr float kOneByTransitionSizePlusOne = 1.f / (kTransitionSize + 1);
|
||||
|
||||
RTC_DCHECK_EQ(from.size(), to.size());
|
||||
RTC_DCHECK_EQ(from.size(), out.size());
|
||||
RTC_DCHECK_LE(kTransitionSize, out.size());
|
||||
|
||||
for (size_t k = 0; k < kTransitionSize; ++k) {
|
||||
float a = (k + 1) * kOneByTransitionSizePlusOne;
|
||||
out[k] = a * to[k] + (1.f - a) * from[k];
|
||||
}
|
||||
|
||||
std::copy(to.begin() + kTransitionSize, to.end(),
|
||||
out.begin() + kTransitionSize);
|
||||
}
|
||||
}
|
||||
|
||||
// Computes a windowed (square root Hanning) padded FFT and updates the related
|
||||
// memory.
|
||||
void WindowedPaddedFft(const Aec3Fft& fft,
|
||||
rtc::ArrayView<const float> v,
|
||||
rtc::ArrayView<float> v_old,
|
||||
FftData* V) {
|
||||
fft.PaddedFft(v, v_old, Aec3Fft::Window::kSqrtHanning, V);
|
||||
std::copy(v.begin(), v.end(), v_old.begin());
|
||||
}
|
||||
|
||||
// Class for removing the echo from the capture signal.
|
||||
class EchoRemoverImpl final : public EchoRemover {
|
||||
public:
|
||||
EchoRemoverImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
~EchoRemoverImpl() override;
|
||||
EchoRemoverImpl(const EchoRemoverImpl&) = delete;
|
||||
EchoRemoverImpl& operator=(const EchoRemoverImpl&) = delete;
|
||||
|
||||
void GetMetrics(EchoControl::Metrics* metrics) const override;
|
||||
|
||||
// Removes the echo from a block of samples from the capture signal. The
|
||||
// supplied render signal is assumed to be pre-aligned with the capture
|
||||
// signal.
|
||||
void ProcessCapture(EchoPathVariability echo_path_variability,
|
||||
bool capture_signal_saturation,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
RenderBuffer* render_buffer,
|
||||
Block* linear_output,
|
||||
Block* capture) override;
|
||||
|
||||
// Updates the status on whether echo leakage is detected in the output of the
|
||||
// echo remover.
|
||||
void UpdateEchoLeakageStatus(bool leakage_detected) override {
|
||||
echo_leakage_detected_ = leakage_detected;
|
||||
}
|
||||
|
||||
void SetCaptureOutputUsage(bool capture_output_used) override {
|
||||
capture_output_used_ = capture_output_used;
|
||||
}
|
||||
|
||||
private:
|
||||
// Selects which of the coarse and refined linear filter outputs that is most
|
||||
// appropriate to pass to the suppressor and forms the linear filter output by
|
||||
// smoothly transition between those.
|
||||
void FormLinearFilterOutput(const SubtractorOutput& subtractor_output,
|
||||
rtc::ArrayView<float> output);
|
||||
|
||||
static std::atomic<int> instance_count_;
|
||||
const EchoCanceller3Config config_;
|
||||
const Aec3Fft fft_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const Aec3Optimization optimization_;
|
||||
const int sample_rate_hz_;
|
||||
const size_t num_render_channels_;
|
||||
const size_t num_capture_channels_;
|
||||
const bool use_coarse_filter_output_;
|
||||
Subtractor subtractor_;
|
||||
SuppressionGain suppression_gain_;
|
||||
ComfortNoiseGenerator cng_;
|
||||
SuppressionFilter suppression_filter_;
|
||||
RenderSignalAnalyzer render_signal_analyzer_;
|
||||
ResidualEchoEstimator residual_echo_estimator_;
|
||||
bool echo_leakage_detected_ = false;
|
||||
bool capture_output_used_ = true;
|
||||
AecState aec_state_;
|
||||
EchoRemoverMetrics metrics_;
|
||||
std::vector<std::array<float, kFftLengthBy2>> e_old_;
|
||||
std::vector<std::array<float, kFftLengthBy2>> y_old_;
|
||||
size_t block_counter_ = 0;
|
||||
int gain_change_hangover_ = 0;
|
||||
bool refined_filter_output_last_selected_ = true;
|
||||
|
||||
std::vector<std::array<float, kFftLengthBy2>> e_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> E2_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> R2_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> S2_linear_heap_;
|
||||
std::vector<FftData> Y_heap_;
|
||||
std::vector<FftData> E_heap_;
|
||||
std::vector<FftData> comfort_noise_heap_;
|
||||
std::vector<FftData> high_band_comfort_noise_heap_;
|
||||
std::vector<SubtractorOutput> subtractor_output_heap_;
|
||||
};
|
||||
|
||||
std::atomic<int> EchoRemoverImpl::instance_count_(0);
|
||||
|
||||
EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels)
|
||||
: config_(config),
|
||||
fft_(),
|
||||
data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
optimization_(DetectOptimization()),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
num_render_channels_(num_render_channels),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
use_coarse_filter_output_(
|
||||
config_.filter.enable_coarse_filter_output_usage),
|
||||
subtractor_(config,
|
||||
num_render_channels_,
|
||||
num_capture_channels_,
|
||||
data_dumper_.get(),
|
||||
optimization_),
|
||||
suppression_gain_(config_,
|
||||
optimization_,
|
||||
sample_rate_hz,
|
||||
num_capture_channels),
|
||||
cng_(config_, optimization_, num_capture_channels_),
|
||||
suppression_filter_(optimization_,
|
||||
sample_rate_hz_,
|
||||
num_capture_channels_),
|
||||
render_signal_analyzer_(config_),
|
||||
residual_echo_estimator_(config_, num_render_channels),
|
||||
aec_state_(config_, num_capture_channels_),
|
||||
e_old_(num_capture_channels_, {0.f}),
|
||||
y_old_(num_capture_channels_, {0.f}),
|
||||
e_heap_(NumChannelsOnHeap(num_capture_channels_), {0.f}),
|
||||
Y2_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
E2_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
R2_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
R2_unbounded_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
S2_linear_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
Y_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
E_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
high_band_comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
subtractor_output_heap_(NumChannelsOnHeap(num_capture_channels_)) {
|
||||
RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
|
||||
}
|
||||
|
||||
EchoRemoverImpl::~EchoRemoverImpl() = default;
|
||||
|
||||
void EchoRemoverImpl::GetMetrics(EchoControl::Metrics* metrics) const {
|
||||
// Echo return loss (ERL) is inverted to go from gain to attenuation.
|
||||
metrics->echo_return_loss = -10.0 * std::log10(aec_state_.ErlTimeDomain());
|
||||
metrics->echo_return_loss_enhancement =
|
||||
Log2TodB(aec_state_.FullBandErleLog2());
|
||||
}
|
||||
|
||||
void EchoRemoverImpl::ProcessCapture(
|
||||
EchoPathVariability echo_path_variability,
|
||||
bool capture_signal_saturation,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
RenderBuffer* render_buffer,
|
||||
Block* linear_output,
|
||||
Block* capture) {
|
||||
++block_counter_;
|
||||
const Block& x = render_buffer->GetBlock(0);
|
||||
Block* y = capture;
|
||||
RTC_DCHECK(render_buffer);
|
||||
RTC_DCHECK(y);
|
||||
RTC_DCHECK_EQ(x.NumBands(), NumBandsForRate(sample_rate_hz_));
|
||||
RTC_DCHECK_EQ(y->NumBands(), NumBandsForRate(sample_rate_hz_));
|
||||
RTC_DCHECK_EQ(x.NumChannels(), num_render_channels_);
|
||||
RTC_DCHECK_EQ(y->NumChannels(), num_capture_channels_);
|
||||
|
||||
// Stack allocated data to use when the number of channels is low.
|
||||
std::array<std::array<float, kFftLengthBy2>, kMaxNumChannelsOnStack> e_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
Y2_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
E2_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
R2_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
R2_unbounded_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
S2_linear_stack;
|
||||
std::array<FftData, kMaxNumChannelsOnStack> Y_stack;
|
||||
std::array<FftData, kMaxNumChannelsOnStack> E_stack;
|
||||
std::array<FftData, kMaxNumChannelsOnStack> comfort_noise_stack;
|
||||
std::array<FftData, kMaxNumChannelsOnStack> high_band_comfort_noise_stack;
|
||||
std::array<SubtractorOutput, kMaxNumChannelsOnStack> subtractor_output_stack;
|
||||
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2>> e(e_stack.data(),
|
||||
num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2(
|
||||
Y2_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> E2(
|
||||
E2_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2(
|
||||
R2_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded(
|
||||
R2_unbounded_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> S2_linear(
|
||||
S2_linear_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<FftData> Y(Y_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<FftData> E(E_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<FftData> comfort_noise(comfort_noise_stack.data(),
|
||||
num_capture_channels_);
|
||||
rtc::ArrayView<FftData> high_band_comfort_noise(
|
||||
high_band_comfort_noise_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<SubtractorOutput> subtractor_output(
|
||||
subtractor_output_stack.data(), num_capture_channels_);
|
||||
if (NumChannelsOnHeap(num_capture_channels_) > 0) {
|
||||
// If the stack-allocated space is too small, use the heap for storing the
|
||||
// microphone data.
|
||||
e = rtc::ArrayView<std::array<float, kFftLengthBy2>>(e_heap_.data(),
|
||||
num_capture_channels_);
|
||||
Y2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
Y2_heap_.data(), num_capture_channels_);
|
||||
E2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
E2_heap_.data(), num_capture_channels_);
|
||||
R2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
R2_heap_.data(), num_capture_channels_);
|
||||
R2_unbounded = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
R2_unbounded_heap_.data(), num_capture_channels_);
|
||||
S2_linear = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
S2_linear_heap_.data(), num_capture_channels_);
|
||||
Y = rtc::ArrayView<FftData>(Y_heap_.data(), num_capture_channels_);
|
||||
E = rtc::ArrayView<FftData>(E_heap_.data(), num_capture_channels_);
|
||||
comfort_noise = rtc::ArrayView<FftData>(comfort_noise_heap_.data(),
|
||||
num_capture_channels_);
|
||||
high_band_comfort_noise = rtc::ArrayView<FftData>(
|
||||
high_band_comfort_noise_heap_.data(), num_capture_channels_);
|
||||
subtractor_output = rtc::ArrayView<SubtractorOutput>(
|
||||
subtractor_output_heap_.data(), num_capture_channels_);
|
||||
}
|
||||
|
||||
data_dumper_->DumpWav("aec3_echo_remover_capture_input",
|
||||
y->View(/*band=*/0, /*channel=*/0), 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_echo_remover_render_input",
|
||||
x.View(/*band=*/0, /*channel=*/0), 16000, 1);
|
||||
data_dumper_->DumpRaw("aec3_echo_remover_capture_input",
|
||||
y->View(/*band=*/0, /*channel=*/0));
|
||||
data_dumper_->DumpRaw("aec3_echo_remover_render_input",
|
||||
x.View(/*band=*/0, /*channel=*/0));
|
||||
|
||||
aec_state_.UpdateCaptureSaturation(capture_signal_saturation);
|
||||
|
||||
if (echo_path_variability.AudioPathChanged()) {
|
||||
// Ensure that the gain change is only acted on once per frame.
|
||||
if (echo_path_variability.gain_change) {
|
||||
if (gain_change_hangover_ == 0) {
|
||||
constexpr int kMaxBlocksPerFrame = 3;
|
||||
gain_change_hangover_ = kMaxBlocksPerFrame;
|
||||
rtc::LoggingSeverity log_level =
|
||||
config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING
|
||||
: rtc::LS_VERBOSE;
|
||||
RTC_LOG_V(log_level)
|
||||
<< "Gain change detected at block " << block_counter_;
|
||||
} else {
|
||||
echo_path_variability.gain_change = false;
|
||||
}
|
||||
}
|
||||
|
||||
subtractor_.HandleEchoPathChange(echo_path_variability);
|
||||
aec_state_.HandleEchoPathChange(echo_path_variability);
|
||||
|
||||
if (echo_path_variability.delay_change !=
|
||||
EchoPathVariability::DelayAdjustment::kNone) {
|
||||
suppression_gain_.SetInitialState(true);
|
||||
}
|
||||
}
|
||||
if (gain_change_hangover_ > 0) {
|
||||
--gain_change_hangover_;
|
||||
}
|
||||
|
||||
// Analyze the render signal.
|
||||
render_signal_analyzer_.Update(*render_buffer,
|
||||
aec_state_.MinDirectPathFilterDelay());
|
||||
|
||||
// State transition.
|
||||
if (aec_state_.TransitionTriggered()) {
|
||||
subtractor_.ExitInitialState();
|
||||
suppression_gain_.SetInitialState(false);
|
||||
}
|
||||
|
||||
// Perform linear echo cancellation.
|
||||
subtractor_.Process(*render_buffer, *y, render_signal_analyzer_, aec_state_,
|
||||
subtractor_output);
|
||||
|
||||
// Compute spectra.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
FormLinearFilterOutput(subtractor_output[ch], e[ch]);
|
||||
WindowedPaddedFft(fft_, y->View(/*band=*/0, ch), y_old_[ch], &Y[ch]);
|
||||
WindowedPaddedFft(fft_, e[ch], e_old_[ch], &E[ch]);
|
||||
LinearEchoPower(E[ch], Y[ch], &S2_linear[ch]);
|
||||
Y[ch].Spectrum(optimization_, Y2[ch]);
|
||||
E[ch].Spectrum(optimization_, E2[ch]);
|
||||
}
|
||||
|
||||
// Optionally return the linear filter output.
|
||||
if (linear_output) {
|
||||
RTC_DCHECK_GE(1, linear_output->NumBands());
|
||||
RTC_DCHECK_EQ(num_capture_channels_, linear_output->NumChannels());
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::copy(e[ch].begin(), e[ch].end(),
|
||||
linear_output->begin(/*band=*/0, ch));
|
||||
}
|
||||
}
|
||||
|
||||
// Update the AEC state information.
|
||||
aec_state_.Update(external_delay, subtractor_.FilterFrequencyResponses(),
|
||||
subtractor_.FilterImpulseResponses(), *render_buffer, E2,
|
||||
Y2, subtractor_output);
|
||||
|
||||
// Choose the linear output.
|
||||
const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y;
|
||||
|
||||
data_dumper_->DumpWav("aec3_output_linear",
|
||||
y->View(/*band=*/0, /*channel=*/0), 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0][0], 16000, 1);
|
||||
|
||||
// Estimate the comfort noise.
|
||||
cng_.Compute(aec_state_.SaturatedCapture(), Y2, comfort_noise,
|
||||
high_band_comfort_noise);
|
||||
|
||||
// Only do the below processing if the output of the audio processing module
|
||||
// is used.
|
||||
std::array<float, kFftLengthBy2Plus1> G;
|
||||
if (capture_output_used_) {
|
||||
// Estimate the residual echo power.
|
||||
residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
|
||||
suppression_gain_.IsDominantNearend(), R2,
|
||||
R2_unbounded);
|
||||
|
||||
// Suppressor nearend estimate.
|
||||
if (aec_state_.UsableLinearEstimate()) {
|
||||
// E2 is bound by Y2.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(),
|
||||
E2[ch].begin(),
|
||||
[](float a, float b) { return std::min(a, b); });
|
||||
}
|
||||
}
|
||||
const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2;
|
||||
|
||||
// Suppressor echo estimate.
|
||||
const auto& echo_spectrum =
|
||||
aec_state_.UsableLinearEstimate() ? S2_linear : R2;
|
||||
|
||||
// Determine if the suppressor should assume clock drift.
|
||||
const bool clock_drift = config_.echo_removal_control.has_clock_drift ||
|
||||
echo_path_variability.clock_drift;
|
||||
|
||||
// Compute preferred gains.
|
||||
float high_bands_gain;
|
||||
suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2, R2_unbounded,
|
||||
cng_.NoiseSpectrum(), render_signal_analyzer_,
|
||||
aec_state_, x, clock_drift, &high_bands_gain, &G);
|
||||
|
||||
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
|
||||
high_bands_gain, Y_fft, y);
|
||||
|
||||
} else {
|
||||
G.fill(0.f);
|
||||
}
|
||||
|
||||
// Update the metrics.
|
||||
metrics_.Update(aec_state_, cng_.NoiseSpectrum()[0], G);
|
||||
|
||||
// Debug outputs for the purpose of development and analysis.
|
||||
data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize,
|
||||
&subtractor_output[0].s_refined[0], 16000, 1);
|
||||
data_dumper_->DumpRaw("aec3_output", y->View(/*band=*/0, /*channel=*/0));
|
||||
data_dumper_->DumpRaw("aec3_narrow_render",
|
||||
render_signal_analyzer_.NarrowPeakBand() ? 1 : 0);
|
||||
data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()[0]);
|
||||
data_dumper_->DumpRaw("aec3_suppressor_gain", G);
|
||||
data_dumper_->DumpWav("aec3_output", y->View(/*band=*/0, /*channel=*/0),
|
||||
16000, 1);
|
||||
data_dumper_->DumpRaw("aec3_using_subtractor_output[0]",
|
||||
aec_state_.UseLinearFilterOutput() ? 1 : 0);
|
||||
data_dumper_->DumpRaw("aec3_E2", E2[0]);
|
||||
data_dumper_->DumpRaw("aec3_S2_linear", S2_linear[0]);
|
||||
data_dumper_->DumpRaw("aec3_Y2", Y2[0]);
|
||||
data_dumper_->DumpRaw(
|
||||
"aec3_X2", render_buffer->Spectrum(
|
||||
aec_state_.MinDirectPathFilterDelay())[/*channel=*/0]);
|
||||
data_dumper_->DumpRaw("aec3_R2", R2[0]);
|
||||
data_dumper_->DumpRaw("aec3_filter_delay",
|
||||
aec_state_.MinDirectPathFilterDelay());
|
||||
data_dumper_->DumpRaw("aec3_capture_saturation",
|
||||
aec_state_.SaturatedCapture() ? 1 : 0);
|
||||
}
|
||||
|
||||
void EchoRemoverImpl::FormLinearFilterOutput(
|
||||
const SubtractorOutput& subtractor_output,
|
||||
rtc::ArrayView<float> output) {
|
||||
RTC_DCHECK_EQ(subtractor_output.e_refined.size(), output.size());
|
||||
RTC_DCHECK_EQ(subtractor_output.e_coarse.size(), output.size());
|
||||
bool use_refined_output = true;
|
||||
if (use_coarse_filter_output_) {
|
||||
// As the output of the refined adaptive filter generally should be better
|
||||
// than the coarse filter output, add a margin and threshold for when
|
||||
// choosing the coarse filter output.
|
||||
if (subtractor_output.e2_coarse < 0.9f * subtractor_output.e2_refined &&
|
||||
subtractor_output.y2 > 30.f * 30.f * kBlockSize &&
|
||||
(subtractor_output.s2_refined > 60.f * 60.f * kBlockSize ||
|
||||
subtractor_output.s2_coarse > 60.f * 60.f * kBlockSize)) {
|
||||
use_refined_output = false;
|
||||
} else {
|
||||
// If the refined filter is diverged, choose the filter output that has
|
||||
// the lowest power.
|
||||
if (subtractor_output.e2_coarse < subtractor_output.e2_refined &&
|
||||
subtractor_output.y2 < subtractor_output.e2_refined) {
|
||||
use_refined_output = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SignalTransition(refined_filter_output_last_selected_
|
||||
? subtractor_output.e_refined
|
||||
: subtractor_output.e_coarse,
|
||||
use_refined_output ? subtractor_output.e_refined
|
||||
: subtractor_output.e_coarse,
|
||||
output);
|
||||
refined_filter_output_last_selected_ = use_refined_output;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
EchoRemover* EchoRemover::Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels) {
|
||||
return new EchoRemoverImpl(config, sample_rate_hz, num_render_channels,
|
||||
num_capture_channels);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
62
VocieProcess/modules/audio_processing/aec3/echo_remover.h
Normal file
62
VocieProcess/modules/audio_processing/aec3/echo_remover.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for removing the echo from the capture signal.
|
||||
class EchoRemover {
|
||||
public:
|
||||
static EchoRemover* Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
virtual ~EchoRemover() = default;
|
||||
|
||||
// Get current metrics.
|
||||
virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0;
|
||||
|
||||
// Removes the echo from a block of samples from the capture signal. The
|
||||
// supplied render signal is assumed to be pre-aligned with the capture
|
||||
// signal.
|
||||
virtual void ProcessCapture(
|
||||
EchoPathVariability echo_path_variability,
|
||||
bool capture_signal_saturation,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
RenderBuffer* render_buffer,
|
||||
Block* linear_output,
|
||||
Block* capture) = 0;
|
||||
|
||||
// Updates the status on whether echo leakage is detected in the output of the
|
||||
// echo remover.
|
||||
virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0;
|
||||
|
||||
// Specifies whether the capture output will be used. The purpose of this is
|
||||
// to allow the echo remover to deactivate some of the processing when the
|
||||
// resulting output is anyway not used, for instance when the endpoint is
|
||||
// muted.
|
||||
virtual void SetCaptureOutputUsage(bool capture_output_used) = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
|
@ -0,0 +1,165 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/echo_remover_metrics.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
#include "system_wrappers/include/metrics.h"
|
||||
|
||||
#ifdef max
|
||||
#undef max
|
||||
#endif
|
||||
|
||||
#ifdef min
|
||||
#undef min
|
||||
#endif
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
EchoRemoverMetrics::DbMetric::DbMetric() : DbMetric(0.f, 0.f, 0.f) {}
|
||||
EchoRemoverMetrics::DbMetric::DbMetric(float sum_value,
|
||||
float floor_value,
|
||||
float ceil_value)
|
||||
: sum_value(sum_value), floor_value(floor_value), ceil_value(ceil_value) {}
|
||||
|
||||
void EchoRemoverMetrics::DbMetric::Update(float value) {
|
||||
sum_value += value;
|
||||
floor_value = std::min(floor_value, value);
|
||||
ceil_value = std::max(ceil_value, value);
|
||||
}
|
||||
|
||||
void EchoRemoverMetrics::DbMetric::UpdateInstant(float value) {
|
||||
sum_value = value;
|
||||
floor_value = std::min(floor_value, value);
|
||||
ceil_value = std::max(ceil_value, value);
|
||||
}
|
||||
|
||||
EchoRemoverMetrics::EchoRemoverMetrics() {
|
||||
ResetMetrics();
|
||||
}
|
||||
|
||||
void EchoRemoverMetrics::ResetMetrics() {
|
||||
erl_time_domain_ = DbMetric(0.f, 10000.f, 0.000f);
|
||||
erle_time_domain_ = DbMetric(0.f, 0.f, 1000.f);
|
||||
saturated_capture_ = false;
|
||||
}
|
||||
|
||||
void EchoRemoverMetrics::Update(
|
||||
const AecState& aec_state,
|
||||
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppressor_gain) {
|
||||
metrics_reported_ = false;
|
||||
if (++block_counter_ <= kMetricsCollectionBlocks) {
|
||||
erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain());
|
||||
erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2());
|
||||
saturated_capture_ = saturated_capture_ || aec_state.SaturatedCapture();
|
||||
} else {
|
||||
// Report the metrics over several frames in order to lower the impact of
|
||||
// the logarithms involved on the computational complexity.
|
||||
switch (block_counter_) {
|
||||
case kMetricsCollectionBlocks + 1:
|
||||
RTC_HISTOGRAM_BOOLEAN(
|
||||
"WebRTC.Audio.EchoCanceller.UsableLinearEstimate",
|
||||
static_cast<int>(aec_state.UsableLinearEstimate() ? 1 : 0));
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay",
|
||||
aec_state.MinDirectPathFilterDelay(), 0, 30,
|
||||
31);
|
||||
RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation",
|
||||
static_cast<int>(saturated_capture_ ? 1 : 0));
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 2:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erl.Value",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_time_domain_.sum_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erl.Max",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_time_domain_.ceil_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erl.Min",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_time_domain_.floor_value),
|
||||
0, 59, 30);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 3:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erle.Value",
|
||||
aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_time_domain_.sum_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erle.Max",
|
||||
aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_time_domain_.ceil_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erle.Min",
|
||||
aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_time_domain_.floor_value),
|
||||
0, 19, 20);
|
||||
metrics_reported_ = true;
|
||||
RTC_DCHECK_EQ(kMetricsReportingIntervalBlocks, block_counter_);
|
||||
block_counter_ = 0;
|
||||
ResetMetrics();
|
||||
break;
|
||||
default:
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
void UpdateDbMetric(const std::array<float, kFftLengthBy2Plus1>& value,
|
||||
std::array<EchoRemoverMetrics::DbMetric, 2>* statistic) {
|
||||
RTC_DCHECK(statistic);
|
||||
// Truncation is intended in the band width computation.
|
||||
constexpr int kNumBands = 2;
|
||||
constexpr int kBandWidth = 65 / kNumBands;
|
||||
constexpr float kOneByBandWidth = 1.f / kBandWidth;
|
||||
RTC_DCHECK_EQ(kNumBands, statistic->size());
|
||||
RTC_DCHECK_EQ(65, value.size());
|
||||
for (size_t k = 0; k < statistic->size(); ++k) {
|
||||
float average_band =
|
||||
std::accumulate(value.begin() + kBandWidth * k,
|
||||
value.begin() + kBandWidth * (k + 1), 0.f) *
|
||||
kOneByBandWidth;
|
||||
(*statistic)[k].Update(average_band);
|
||||
}
|
||||
}
|
||||
|
||||
int TransformDbMetricForReporting(bool negate,
|
||||
float min_value,
|
||||
float max_value,
|
||||
float offset,
|
||||
float scaling,
|
||||
float value) {
|
||||
float new_value = 10.f * std::log10(value * scaling + 1e-10f) + offset;
|
||||
if (negate) {
|
||||
new_value = -new_value;
|
||||
}
|
||||
return static_cast<int>(rtc::SafeClamp(new_value, min_value, max_value));
|
||||
}
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Handles the reporting of metrics for the echo remover.
|
||||
class EchoRemoverMetrics {
|
||||
public:
|
||||
struct DbMetric {
|
||||
DbMetric();
|
||||
DbMetric(float sum_value, float floor_value, float ceil_value);
|
||||
void Update(float value);
|
||||
void UpdateInstant(float value);
|
||||
float sum_value;
|
||||
float floor_value;
|
||||
float ceil_value;
|
||||
};
|
||||
|
||||
EchoRemoverMetrics();
|
||||
|
||||
EchoRemoverMetrics(const EchoRemoverMetrics&) = delete;
|
||||
EchoRemoverMetrics& operator=(const EchoRemoverMetrics&) = delete;
|
||||
|
||||
// Updates the metric with new data.
|
||||
void Update(
|
||||
const AecState& aec_state,
|
||||
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppressor_gain);
|
||||
|
||||
// Returns true if the metrics have just been reported, otherwise false.
|
||||
bool MetricsReported() { return metrics_reported_; }
|
||||
|
||||
private:
|
||||
// Resets the metrics.
|
||||
void ResetMetrics();
|
||||
|
||||
int block_counter_ = 0;
|
||||
DbMetric erl_time_domain_;
|
||||
DbMetric erle_time_domain_;
|
||||
bool saturated_capture_ = false;
|
||||
bool metrics_reported_ = false;
|
||||
};
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
// Updates a banded metric of type DbMetric with the values in the supplied
|
||||
// array.
|
||||
void UpdateDbMetric(const std::array<float, kFftLengthBy2Plus1>& value,
|
||||
std::array<EchoRemoverMetrics::DbMetric, 2>* statistic);
|
||||
|
||||
// Transforms a DbMetric from the linear domain into the logarithmic domain.
|
||||
int TransformDbMetricForReporting(bool negate,
|
||||
float min_value,
|
||||
float max_value,
|
||||
float offset,
|
||||
float scaling,
|
||||
float value);
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
|
146
VocieProcess/modules/audio_processing/aec3/erl_estimator.cc
Normal file
146
VocieProcess/modules/audio_processing/aec3/erl_estimator.cc
Normal file
@ -0,0 +1,146 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/erl_estimator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr float kMinErl = 0.01f;
|
||||
constexpr float kMaxErl = 1000.f;
|
||||
|
||||
} // namespace
|
||||
|
||||
ErlEstimator::ErlEstimator(size_t startup_phase_length_blocks_)
|
||||
: startup_phase_length_blocks__(startup_phase_length_blocks_) {
|
||||
erl_.fill(kMaxErl);
|
||||
hold_counters_.fill(0);
|
||||
erl_time_domain_ = kMaxErl;
|
||||
hold_counter_time_domain_ = 0;
|
||||
}
|
||||
|
||||
ErlEstimator::~ErlEstimator() = default;
|
||||
|
||||
void ErlEstimator::Reset() {
|
||||
blocks_since_reset_ = 0;
|
||||
}
|
||||
|
||||
void ErlEstimator::Update(
|
||||
const std::vector<bool>& converged_filters,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> render_spectra,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
capture_spectra) {
|
||||
const size_t num_capture_channels = converged_filters.size();
|
||||
RTC_DCHECK_EQ(capture_spectra.size(), num_capture_channels);
|
||||
|
||||
// Corresponds to WGN of power -46 dBFS.
|
||||
constexpr float kX2Min = 44015068.0f;
|
||||
|
||||
const auto first_converged_iter =
|
||||
std::find(converged_filters.begin(), converged_filters.end(), true);
|
||||
const bool any_filter_converged =
|
||||
first_converged_iter != converged_filters.end();
|
||||
|
||||
if (++blocks_since_reset_ < startup_phase_length_blocks__ ||
|
||||
!any_filter_converged) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Use the maximum spectrum across capture and the maximum across render.
|
||||
std::array<float, kFftLengthBy2Plus1> max_capture_spectrum_data;
|
||||
std::array<float, kFftLengthBy2Plus1> max_capture_spectrum =
|
||||
capture_spectra[/*channel=*/0];
|
||||
if (num_capture_channels > 1) {
|
||||
// Initialize using the first channel with a converged filter.
|
||||
const size_t first_converged =
|
||||
std::distance(converged_filters.begin(), first_converged_iter);
|
||||
RTC_DCHECK_GE(first_converged, 0);
|
||||
RTC_DCHECK_LT(first_converged, num_capture_channels);
|
||||
max_capture_spectrum_data = capture_spectra[first_converged];
|
||||
|
||||
for (size_t ch = first_converged + 1; ch < num_capture_channels; ++ch) {
|
||||
if (!converged_filters[ch]) {
|
||||
continue;
|
||||
}
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
max_capture_spectrum_data[k] =
|
||||
std::max(max_capture_spectrum_data[k], capture_spectra[ch][k]);
|
||||
}
|
||||
}
|
||||
max_capture_spectrum = max_capture_spectrum_data;
|
||||
}
|
||||
|
||||
const size_t num_render_channels = render_spectra.size();
|
||||
std::array<float, kFftLengthBy2Plus1> max_render_spectrum_data;
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> max_render_spectrum =
|
||||
render_spectra[/*channel=*/0];
|
||||
if (num_render_channels > 1) {
|
||||
std::copy(render_spectra[0].begin(), render_spectra[0].end(),
|
||||
max_render_spectrum_data.begin());
|
||||
for (size_t ch = 1; ch < num_render_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
max_render_spectrum_data[k] =
|
||||
std::max(max_render_spectrum_data[k], render_spectra[ch][k]);
|
||||
}
|
||||
}
|
||||
max_render_spectrum = max_render_spectrum_data;
|
||||
}
|
||||
|
||||
const auto& X2 = max_render_spectrum;
|
||||
const auto& Y2 = max_capture_spectrum;
|
||||
|
||||
// Update the estimates in a maximum statistics manner.
|
||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||
if (X2[k] > kX2Min) {
|
||||
const float new_erl = Y2[k] / X2[k];
|
||||
if (new_erl < erl_[k]) {
|
||||
hold_counters_[k - 1] = 1000;
|
||||
erl_[k] += 0.1f * (new_erl - erl_[k]);
|
||||
erl_[k] = std::max(erl_[k], kMinErl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::for_each(hold_counters_.begin(), hold_counters_.end(),
|
||||
[](int& a) { --a; });
|
||||
std::transform(hold_counters_.begin(), hold_counters_.end(), erl_.begin() + 1,
|
||||
erl_.begin() + 1, [](int a, float b) {
|
||||
return a > 0 ? b : std::min(kMaxErl, 2.f * b);
|
||||
});
|
||||
|
||||
erl_[0] = erl_[1];
|
||||
erl_[kFftLengthBy2] = erl_[kFftLengthBy2 - 1];
|
||||
|
||||
// Compute ERL over all frequency bins.
|
||||
const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f);
|
||||
|
||||
if (X2_sum > kX2Min * X2.size()) {
|
||||
const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f);
|
||||
const float new_erl = Y2_sum / X2_sum;
|
||||
if (new_erl < erl_time_domain_) {
|
||||
hold_counter_time_domain_ = 1000;
|
||||
erl_time_domain_ += 0.1f * (new_erl - erl_time_domain_);
|
||||
erl_time_domain_ = std::max(erl_time_domain_, kMinErl);
|
||||
}
|
||||
}
|
||||
|
||||
--hold_counter_time_domain_;
|
||||
erl_time_domain_ = (hold_counter_time_domain_ > 0)
|
||||
? erl_time_domain_
|
||||
: std::min(kMaxErl, 2.f * erl_time_domain_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
58
VocieProcess/modules/audio_processing/aec3/erl_estimator.h
Normal file
58
VocieProcess/modules/audio_processing/aec3/erl_estimator.h
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Estimates the echo return loss based on the signal spectra.
|
||||
class ErlEstimator {
|
||||
public:
|
||||
explicit ErlEstimator(size_t startup_phase_length_blocks_);
|
||||
~ErlEstimator();
|
||||
|
||||
ErlEstimator(const ErlEstimator&) = delete;
|
||||
ErlEstimator& operator=(const ErlEstimator&) = delete;
|
||||
|
||||
// Resets the ERL estimation.
|
||||
void Reset();
|
||||
|
||||
// Updates the ERL estimate.
|
||||
void Update(const std::vector<bool>& converged_filters,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
render_spectra,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
capture_spectra);
|
||||
|
||||
// Returns the most recent ERL estimate.
|
||||
const std::array<float, kFftLengthBy2Plus1>& Erl() const { return erl_; }
|
||||
float ErlTimeDomain() const { return erl_time_domain_; }
|
||||
|
||||
private:
|
||||
const size_t startup_phase_length_blocks__;
|
||||
std::array<float, kFftLengthBy2Plus1> erl_;
|
||||
std::array<int, kFftLengthBy2Minus1> hold_counters_;
|
||||
float erl_time_domain_;
|
||||
int hold_counter_time_domain_;
|
||||
size_t blocks_since_reset_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
|
89
VocieProcess/modules/audio_processing/aec3/erle_estimator.cc
Normal file
89
VocieProcess/modules/audio_processing/aec3/erle_estimator.cc
Normal file
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/erle_estimator.h"
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks,
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: startup_phase_length_blocks_(startup_phase_length_blocks),
|
||||
fullband_erle_estimator_(config.erle, num_capture_channels),
|
||||
subband_erle_estimator_(config, num_capture_channels) {
|
||||
if (config.erle.num_sections > 1) {
|
||||
signal_dependent_erle_estimator_ =
|
||||
std::make_unique<SignalDependentErleEstimator>(config,
|
||||
num_capture_channels);
|
||||
}
|
||||
Reset(true);
|
||||
}
|
||||
|
||||
ErleEstimator::~ErleEstimator() = default;
|
||||
|
||||
void ErleEstimator::Reset(bool delay_change) {
|
||||
fullband_erle_estimator_.Reset();
|
||||
subband_erle_estimator_.Reset();
|
||||
if (signal_dependent_erle_estimator_) {
|
||||
signal_dependent_erle_estimator_->Reset();
|
||||
}
|
||||
if (delay_change) {
|
||||
blocks_since_reset_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void ErleEstimator::Update(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
filter_frequency_responses,
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1>
|
||||
avg_render_spectrum_with_reverb,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> capture_spectra,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
subtractor_spectra,
|
||||
const std::vector<bool>& converged_filters) {
|
||||
RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(),
|
||||
capture_spectra.size());
|
||||
RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(),
|
||||
subtractor_spectra.size());
|
||||
const auto& X2_reverb = avg_render_spectrum_with_reverb;
|
||||
const auto& Y2 = capture_spectra;
|
||||
const auto& E2 = subtractor_spectra;
|
||||
|
||||
if (++blocks_since_reset_ < startup_phase_length_blocks_) {
|
||||
return;
|
||||
}
|
||||
|
||||
subband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters);
|
||||
|
||||
if (signal_dependent_erle_estimator_) {
|
||||
signal_dependent_erle_estimator_->Update(
|
||||
render_buffer, filter_frequency_responses, X2_reverb, Y2, E2,
|
||||
subband_erle_estimator_.Erle(/*onset_compensated=*/false),
|
||||
subband_erle_estimator_.Erle(/*onset_compensated=*/true),
|
||||
converged_filters);
|
||||
}
|
||||
|
||||
fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters);
|
||||
}
|
||||
|
||||
void ErleEstimator::Dump(
|
||||
const std::unique_ptr<ApmDataDumper>& data_dumper) const {
|
||||
fullband_erle_estimator_.Dump(data_dumper);
|
||||
subband_erle_estimator_.Dump(data_dumper);
|
||||
if (signal_dependent_erle_estimator_) {
|
||||
signal_dependent_erle_estimator_->Dump(data_dumper);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
112
VocieProcess/modules/audio_processing/aec3/erle_estimator.h
Normal file
112
VocieProcess/modules/audio_processing/aec3/erle_estimator.h
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/fullband_erle_estimator.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h"
|
||||
#include "modules/audio_processing/aec3/subband_erle_estimator.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Estimates the echo return loss enhancement. One estimate is done per subband
|
||||
// and another one is done using the aggreation of energy over all the subbands.
|
||||
class ErleEstimator {
|
||||
public:
|
||||
ErleEstimator(size_t startup_phase_length_blocks,
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
~ErleEstimator();
|
||||
|
||||
// Resets the fullband ERLE estimator and the subbands ERLE estimators.
|
||||
void Reset(bool delay_change);
|
||||
|
||||
// Updates the ERLE estimates.
|
||||
void Update(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
filter_frequency_responses,
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1>
|
||||
avg_render_spectrum_with_reverb,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
capture_spectra,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
subtractor_spectra,
|
||||
const std::vector<bool>& converged_filters);
|
||||
|
||||
// Returns the most recent subband ERLE estimates.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
|
||||
bool onset_compensated) const {
|
||||
return signal_dependent_erle_estimator_
|
||||
? signal_dependent_erle_estimator_->Erle(onset_compensated)
|
||||
: subband_erle_estimator_.Erle(onset_compensated);
|
||||
}
|
||||
|
||||
// Returns the non-capped subband ERLE.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded()
|
||||
const {
|
||||
// Unbounded ERLE is only used with the subband erle estimator where the
|
||||
// ERLE is often capped at low values. When the signal dependent ERLE
|
||||
// estimator is used the capped ERLE is returned.
|
||||
return !signal_dependent_erle_estimator_
|
||||
? subband_erle_estimator_.ErleUnbounded()
|
||||
: signal_dependent_erle_estimator_->Erle(
|
||||
/*onset_compensated=*/false);
|
||||
}
|
||||
|
||||
// Returns the subband ERLE that are estimated during onsets (only used for
|
||||
// testing).
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleDuringOnsets()
|
||||
const {
|
||||
return subband_erle_estimator_.ErleDuringOnsets();
|
||||
}
|
||||
|
||||
// Returns the fullband ERLE estimate.
|
||||
float FullbandErleLog2() const {
|
||||
return fullband_erle_estimator_.FullbandErleLog2();
|
||||
}
|
||||
|
||||
// Returns an estimation of the current linear filter quality based on the
|
||||
// current and past fullband ERLE estimates. The returned value is a float
|
||||
// vector with content between 0 and 1 where 1 indicates that, at this current
|
||||
// time instant, the linear filter is reaching its maximum subtraction
|
||||
// performance.
|
||||
rtc::ArrayView<const absl::optional<float>> GetInstLinearQualityEstimates()
|
||||
const {
|
||||
return fullband_erle_estimator_.GetInstLinearQualityEstimates();
|
||||
}
|
||||
|
||||
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
|
||||
|
||||
private:
|
||||
const size_t startup_phase_length_blocks_;
|
||||
FullBandErleEstimator fullband_erle_estimator_;
|
||||
SubbandErleEstimator subband_erle_estimator_;
|
||||
std::unique_ptr<SignalDependentErleEstimator>
|
||||
signal_dependent_erle_estimator_;
|
||||
size_t blocks_since_reset_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
|
27
VocieProcess/modules/audio_processing/aec3/fft_buffer.cc
Normal file
27
VocieProcess/modules/audio_processing/aec3/fft_buffer.cc
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/fft_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
FftBuffer::FftBuffer(size_t size, size_t num_channels)
|
||||
: size(static_cast<int>(size)),
|
||||
buffer(size, std::vector<FftData>(num_channels)) {
|
||||
for (auto& block : buffer) {
|
||||
for (auto& channel_fft_data : block) {
|
||||
channel_fft_data.Clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FftBuffer::~FftBuffer() = default;
|
||||
|
||||
} // namespace webrtc
|
60
VocieProcess/modules/audio_processing/aec3/fft_buffer.h
Normal file
60
VocieProcess/modules/audio_processing/aec3/fft_buffer.h
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Struct for bundling a circular buffer of FftData objects together with the
|
||||
// read and write indices.
|
||||
struct FftBuffer {
|
||||
FftBuffer(size_t size, size_t num_channels);
|
||||
~FftBuffer();
|
||||
|
||||
int IncIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index < size - 1 ? index + 1 : 0;
|
||||
}
|
||||
|
||||
int DecIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index > 0 ? index - 1 : size - 1;
|
||||
}
|
||||
|
||||
int OffsetIndex(int index, int offset) const {
|
||||
RTC_DCHECK_GE(buffer.size(), offset);
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return (size + index + offset) % size;
|
||||
}
|
||||
|
||||
void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
|
||||
void IncWriteIndex() { write = IncIndex(write); }
|
||||
void DecWriteIndex() { write = DecIndex(write); }
|
||||
void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
|
||||
void IncReadIndex() { read = IncIndex(read); }
|
||||
void DecReadIndex() { read = DecIndex(read); }
|
||||
|
||||
const int size;
|
||||
std::vector<std::vector<FftData>> buffer;
|
||||
int write = 0;
|
||||
int read = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
|
104
VocieProcess/modules/audio_processing/aec3/fft_data.h
Normal file
104
VocieProcess/modules/audio_processing/aec3/fft_data.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
|
||||
|
||||
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Struct that holds imaginary data produced from 128 point real-valued FFTs.
|
||||
struct FftData {
|
||||
// Copies the data in src.
|
||||
void Assign(const FftData& src) {
|
||||
std::copy(src.re.begin(), src.re.end(), re.begin());
|
||||
std::copy(src.im.begin(), src.im.end(), im.begin());
|
||||
im[0] = im[kFftLengthBy2] = 0;
|
||||
}
|
||||
|
||||
// Clears all the imaginary.
|
||||
void Clear() {
|
||||
re.fill(0.f);
|
||||
im.fill(0.f);
|
||||
}
|
||||
|
||||
// Computes the power spectrum of the data.
|
||||
void SpectrumAVX2(rtc::ArrayView<float> power_spectrum) const;
|
||||
|
||||
// Computes the power spectrum of the data.
|
||||
void Spectrum(Aec3Optimization optimization,
|
||||
rtc::ArrayView<float> power_spectrum) const {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size());
|
||||
switch (optimization) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2: {
|
||||
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
|
||||
constexpr int kLimit = kNumFourBinBands * 4;
|
||||
for (size_t k = 0; k < kLimit; k += 4) {
|
||||
const __m128 r = _mm_loadu_ps(&re[k]);
|
||||
const __m128 i = _mm_loadu_ps(&im[k]);
|
||||
const __m128 ii = _mm_mul_ps(i, i);
|
||||
const __m128 rr = _mm_mul_ps(r, r);
|
||||
const __m128 rrii = _mm_add_ps(rr, ii);
|
||||
_mm_storeu_ps(&power_spectrum[k], rrii);
|
||||
}
|
||||
power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] +
|
||||
im[kFftLengthBy2] * im[kFftLengthBy2];
|
||||
} break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
SpectrumAVX2(power_spectrum);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
std::transform(re.begin(), re.end(), im.begin(), power_spectrum.begin(),
|
||||
[](float a, float b) { return a * a + b * b; });
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the data from an interleaved array.
|
||||
void CopyFromPackedArray(const std::array<float, kFftLength>& v) {
|
||||
re[0] = v[0];
|
||||
re[kFftLengthBy2] = v[1];
|
||||
im[0] = im[kFftLengthBy2] = 0;
|
||||
for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) {
|
||||
re[k] = v[j++];
|
||||
im[k] = v[j++];
|
||||
}
|
||||
}
|
||||
|
||||
// Copies the data into an interleaved array.
|
||||
void CopyToPackedArray(std::array<float, kFftLength>* v) const {
|
||||
RTC_DCHECK(v);
|
||||
(*v)[0] = re[0];
|
||||
(*v)[1] = re[kFftLengthBy2];
|
||||
for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) {
|
||||
(*v)[j++] = re[k];
|
||||
(*v)[j++] = im[k];
|
||||
}
|
||||
}
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> re;
|
||||
std::array<float, kFftLengthBy2Plus1> im;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
|
289
VocieProcess/modules/audio_processing/aec3/filter_analyzer.cc
Normal file
289
VocieProcess/modules/audio_processing/aec3/filter_analyzer.cc
Normal file
@ -0,0 +1,289 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/filter_analyzer.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <numeric>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
size_t FindPeakIndex(rtc::ArrayView<const float> filter_time_domain,
|
||||
size_t peak_index_in,
|
||||
size_t start_sample,
|
||||
size_t end_sample) {
|
||||
size_t peak_index_out = peak_index_in;
|
||||
float max_h2 =
|
||||
filter_time_domain[peak_index_out] * filter_time_domain[peak_index_out];
|
||||
for (size_t k = start_sample; k <= end_sample; ++k) {
|
||||
float tmp = filter_time_domain[k] * filter_time_domain[k];
|
||||
if (tmp > max_h2) {
|
||||
peak_index_out = k;
|
||||
max_h2 = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
return peak_index_out;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::atomic<int> FilterAnalyzer::instance_count_(0);
|
||||
|
||||
FilterAnalyzer::FilterAnalyzer(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
bounded_erl_(config.ep_strength.bounded_erl),
|
||||
default_gain_(config.ep_strength.default_gain),
|
||||
h_highpass_(num_capture_channels,
|
||||
std::vector<float>(
|
||||
GetTimeDomainLength(config.filter.refined.length_blocks),
|
||||
0.f)),
|
||||
filter_analysis_states_(num_capture_channels,
|
||||
FilterAnalysisState(config)),
|
||||
filter_delays_blocks_(num_capture_channels, 0) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
FilterAnalyzer::~FilterAnalyzer() = default;
|
||||
|
||||
void FilterAnalyzer::Reset() {
|
||||
blocks_since_reset_ = 0;
|
||||
ResetRegion();
|
||||
for (auto& state : filter_analysis_states_) {
|
||||
state.Reset(default_gain_);
|
||||
}
|
||||
std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(), 0);
|
||||
}
|
||||
|
||||
void FilterAnalyzer::Update(
|
||||
rtc::ArrayView<const std::vector<float>> filters_time_domain,
|
||||
const RenderBuffer& render_buffer,
|
||||
bool* any_filter_consistent,
|
||||
float* max_echo_path_gain) {
|
||||
RTC_DCHECK(any_filter_consistent);
|
||||
RTC_DCHECK(max_echo_path_gain);
|
||||
RTC_DCHECK_EQ(filters_time_domain.size(), filter_analysis_states_.size());
|
||||
RTC_DCHECK_EQ(filters_time_domain.size(), h_highpass_.size());
|
||||
|
||||
++blocks_since_reset_;
|
||||
SetRegionToAnalyze(filters_time_domain[0].size());
|
||||
AnalyzeRegion(filters_time_domain, render_buffer);
|
||||
|
||||
// Aggregate the results for all capture channels.
|
||||
auto& st_ch0 = filter_analysis_states_[0];
|
||||
*any_filter_consistent = st_ch0.consistent_estimate;
|
||||
*max_echo_path_gain = st_ch0.gain;
|
||||
min_filter_delay_blocks_ = filter_delays_blocks_[0];
|
||||
for (size_t ch = 1; ch < filters_time_domain.size(); ++ch) {
|
||||
auto& st_ch = filter_analysis_states_[ch];
|
||||
*any_filter_consistent =
|
||||
*any_filter_consistent || st_ch.consistent_estimate;
|
||||
*max_echo_path_gain = std::max(*max_echo_path_gain, st_ch.gain);
|
||||
min_filter_delay_blocks_ =
|
||||
std::min(min_filter_delay_blocks_, filter_delays_blocks_[ch]);
|
||||
}
|
||||
}
|
||||
|
||||
void FilterAnalyzer::AnalyzeRegion(
|
||||
rtc::ArrayView<const std::vector<float>> filters_time_domain,
|
||||
const RenderBuffer& render_buffer) {
|
||||
// Preprocess the filter to avoid issues with low-frequency components in the
|
||||
// filter.
|
||||
PreProcessFilters(filters_time_domain);
|
||||
data_dumper_->DumpRaw("aec3_linear_filter_processed_td", h_highpass_[0]);
|
||||
|
||||
constexpr float kOneByBlockSize = 1.f / kBlockSize;
|
||||
for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) {
|
||||
RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size());
|
||||
RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size());
|
||||
|
||||
auto& st_ch = filter_analysis_states_[ch];
|
||||
RTC_DCHECK_EQ(h_highpass_[ch].size(), filters_time_domain[ch].size());
|
||||
RTC_DCHECK_GT(h_highpass_[ch].size(), 0);
|
||||
st_ch.peak_index = std::min(st_ch.peak_index, h_highpass_[ch].size() - 1);
|
||||
|
||||
st_ch.peak_index =
|
||||
FindPeakIndex(h_highpass_[ch], st_ch.peak_index, region_.start_sample_,
|
||||
region_.end_sample_);
|
||||
filter_delays_blocks_[ch] = st_ch.peak_index >> kBlockSizeLog2;
|
||||
UpdateFilterGain(h_highpass_[ch], &st_ch);
|
||||
st_ch.filter_length_blocks =
|
||||
filters_time_domain[ch].size() * kOneByBlockSize;
|
||||
|
||||
st_ch.consistent_estimate = st_ch.consistent_filter_detector.Detect(
|
||||
h_highpass_[ch], region_,
|
||||
render_buffer.GetBlock(-filter_delays_blocks_[ch]), st_ch.peak_index,
|
||||
filter_delays_blocks_[ch]);
|
||||
}
|
||||
}
|
||||
|
||||
void FilterAnalyzer::UpdateFilterGain(
|
||||
rtc::ArrayView<const float> filter_time_domain,
|
||||
FilterAnalysisState* st) {
|
||||
bool sufficient_time_to_converge =
|
||||
blocks_since_reset_ > 5 * kNumBlocksPerSecond;
|
||||
|
||||
if (sufficient_time_to_converge && st->consistent_estimate) {
|
||||
st->gain = fabsf(filter_time_domain[st->peak_index]);
|
||||
} else {
|
||||
// TODO(peah): Verify whether this check against a float is ok.
|
||||
if (st->gain) {
|
||||
st->gain = std::max(st->gain, fabsf(filter_time_domain[st->peak_index]));
|
||||
}
|
||||
}
|
||||
|
||||
if (bounded_erl_ && st->gain) {
|
||||
st->gain = std::max(st->gain, 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
void FilterAnalyzer::PreProcessFilters(
|
||||
rtc::ArrayView<const std::vector<float>> filters_time_domain) {
|
||||
for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) {
|
||||
RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size());
|
||||
RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size());
|
||||
|
||||
RTC_DCHECK_GE(h_highpass_[ch].capacity(), filters_time_domain[ch].size());
|
||||
h_highpass_[ch].resize(filters_time_domain[ch].size());
|
||||
// Minimum phase high-pass filter with cutoff frequency at about 600 Hz.
|
||||
constexpr std::array<float, 3> h = {
|
||||
{0.7929742f, -0.36072128f, -0.47047766f}};
|
||||
|
||||
std::fill(h_highpass_[ch].begin() + region_.start_sample_,
|
||||
h_highpass_[ch].begin() + region_.end_sample_ + 1, 0.f);
|
||||
float* h_highpass_ch = h_highpass_[ch].data();
|
||||
const float* filters_time_domain_ch = filters_time_domain[ch].data();
|
||||
const size_t region_end = region_.end_sample_;
|
||||
for (size_t k = std::max(h.size() - 1, region_.start_sample_);
|
||||
k <= region_end; ++k) {
|
||||
float tmp = h_highpass_ch[k];
|
||||
for (size_t j = 0; j < h.size(); ++j) {
|
||||
tmp += filters_time_domain_ch[k - j] * h[j];
|
||||
}
|
||||
h_highpass_ch[k] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FilterAnalyzer::ResetRegion() {
|
||||
region_.start_sample_ = 0;
|
||||
region_.end_sample_ = 0;
|
||||
}
|
||||
|
||||
void FilterAnalyzer::SetRegionToAnalyze(size_t filter_size) {
|
||||
constexpr size_t kNumberBlocksToUpdate = 1;
|
||||
auto& r = region_;
|
||||
r.start_sample_ = r.end_sample_ >= filter_size - 1 ? 0 : r.end_sample_ + 1;
|
||||
r.end_sample_ =
|
||||
std::min(r.start_sample_ + kNumberBlocksToUpdate * kBlockSize - 1,
|
||||
filter_size - 1);
|
||||
|
||||
// Check range.
|
||||
RTC_DCHECK_LT(r.start_sample_, filter_size);
|
||||
RTC_DCHECK_LT(r.end_sample_, filter_size);
|
||||
RTC_DCHECK_LE(r.start_sample_, r.end_sample_);
|
||||
}
|
||||
|
||||
FilterAnalyzer::ConsistentFilterDetector::ConsistentFilterDetector(
|
||||
const EchoCanceller3Config& config)
|
||||
: active_render_threshold_(config.render_levels.active_render_limit *
|
||||
config.render_levels.active_render_limit *
|
||||
kFftLengthBy2) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
void FilterAnalyzer::ConsistentFilterDetector::Reset() {
|
||||
significant_peak_ = false;
|
||||
filter_floor_accum_ = 0.f;
|
||||
filter_secondary_peak_ = 0.f;
|
||||
filter_floor_low_limit_ = 0;
|
||||
filter_floor_high_limit_ = 0;
|
||||
consistent_estimate_counter_ = 0;
|
||||
consistent_delay_reference_ = -10;
|
||||
}
|
||||
|
||||
bool FilterAnalyzer::ConsistentFilterDetector::Detect(
|
||||
rtc::ArrayView<const float> filter_to_analyze,
|
||||
const FilterRegion& region,
|
||||
const Block& x_block,
|
||||
size_t peak_index,
|
||||
int delay_blocks) {
|
||||
if (region.start_sample_ == 0) {
|
||||
filter_floor_accum_ = 0.f;
|
||||
filter_secondary_peak_ = 0.f;
|
||||
filter_floor_low_limit_ = peak_index < 64 ? 0 : peak_index - 64;
|
||||
filter_floor_high_limit_ =
|
||||
peak_index > filter_to_analyze.size() - 129 ? 0 : peak_index + 128;
|
||||
}
|
||||
|
||||
float filter_floor_accum = filter_floor_accum_;
|
||||
float filter_secondary_peak = filter_secondary_peak_;
|
||||
for (size_t k = region.start_sample_;
|
||||
k < std::min(region.end_sample_ + 1, filter_floor_low_limit_); ++k) {
|
||||
float abs_h = fabsf(filter_to_analyze[k]);
|
||||
filter_floor_accum += abs_h;
|
||||
filter_secondary_peak = std::max(filter_secondary_peak, abs_h);
|
||||
}
|
||||
|
||||
for (size_t k = std::max(filter_floor_high_limit_, region.start_sample_);
|
||||
k <= region.end_sample_; ++k) {
|
||||
float abs_h = fabsf(filter_to_analyze[k]);
|
||||
filter_floor_accum += abs_h;
|
||||
filter_secondary_peak = std::max(filter_secondary_peak, abs_h);
|
||||
}
|
||||
filter_floor_accum_ = filter_floor_accum;
|
||||
filter_secondary_peak_ = filter_secondary_peak;
|
||||
|
||||
if (region.end_sample_ == filter_to_analyze.size() - 1) {
|
||||
float filter_floor = filter_floor_accum_ /
|
||||
(filter_floor_low_limit_ + filter_to_analyze.size() -
|
||||
filter_floor_high_limit_);
|
||||
|
||||
float abs_peak = fabsf(filter_to_analyze[peak_index]);
|
||||
significant_peak_ = abs_peak > 10.f * filter_floor &&
|
||||
abs_peak > 2.f * filter_secondary_peak_;
|
||||
}
|
||||
|
||||
if (significant_peak_) {
|
||||
bool active_render_block = false;
|
||||
for (int ch = 0; ch < x_block.NumChannels(); ++ch) {
|
||||
rtc::ArrayView<const float, kBlockSize> x_channel =
|
||||
x_block.View(/*band=*/0, ch);
|
||||
const float x_energy = std::inner_product(
|
||||
x_channel.begin(), x_channel.end(), x_channel.begin(), 0.f);
|
||||
if (x_energy > active_render_threshold_) {
|
||||
active_render_block = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (consistent_delay_reference_ == delay_blocks) {
|
||||
if (active_render_block) {
|
||||
++consistent_estimate_counter_;
|
||||
}
|
||||
} else {
|
||||
consistent_estimate_counter_ = 0;
|
||||
consistent_delay_reference_ = delay_blocks;
|
||||
}
|
||||
}
|
||||
return consistent_estimate_counter_ > 1.5f * kNumBlocksPerSecond;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user