FaceAccess/Record/EchoRecord.cpp
2024-09-06 16:35:51 +08:00

138 lines
5.7 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "BoostLog.h"
#include "DateTime.h"
#include "SpeexDsp.h"
#include "Utility.h"
#include "WebRtcAecm.h"
#include "api/audio/echo_canceller3_config.h"
#include "api/audio/echo_canceller3_factory.h"
#include "main.h"
#include "modules/audio_processing/aec3/echo_canceller3.h"
#include <memory>
class EchoRecordPrivate {
public:
void initialize(int sampleRate, int channels, int period) {
std::unique_ptr<webrtc::EchoCanceller3Factory> factory = std::make_unique<webrtc::EchoCanceller3Factory>();
echoCanceller = factory->Create(sampleRate, channels, channels);
nearendBuffer = std::make_unique<webrtc::AudioBuffer>(sampleRate, channels, sampleRate, channels, sampleRate, channels);
farendBuffer = std::make_unique<webrtc::AudioBuffer>(sampleRate, channels, sampleRate, channels, sampleRate, channels);
linearOutputBuffer = std::make_unique<webrtc::AudioBuffer>(sampleRate, channels, sampleRate, channels, sampleRate, channels);
}
std::unique_ptr<webrtc::EchoControl> echoCanceller;
std::unique_ptr<webrtc::AudioBuffer> nearendBuffer;
std::unique_ptr<webrtc::AudioBuffer> farendBuffer;
std::unique_ptr<webrtc::AudioBuffer> linearOutputBuffer;
};
EchoRecordTask::EchoRecordTask() : m_d{new EchoRecordPrivate()} {
}
EchoRecordTask::~EchoRecordTask() {
if (m_d != nullptr) {
delete m_d;
}
}
void EchoRecordTask::setDsp(Dsp dsp) {
if (m_dsp != dsp) {
m_dsp = dsp;
}
}
void EchoRecordTask::setDumpEnabled(bool enabled) {
if (enabled) {
auto date = DateTime::currentDateTime().toString("%Y%m%d%H%M%S");
std::ostringstream oss;
oss << DumpPath << "/mic_" << date << ".pcm";
auto filePath = oss.str();
m_micOfs = std::make_shared<std::ofstream>(filePath, std::ofstream::binary);
oss.str("");
oss << DumpPath << "/speak_" << dspToString(m_dsp) << "_" << date << ".pcm";
filePath = oss.str();
m_outOfs = std::make_shared<std::ofstream>(filePath, std::ofstream::binary);
}
}
void EchoRecordTask::setChannels(int channels) {
if (m_channels != channels) {
m_channels = channels;
}
}
// underrun occurred pcm播放饥饿
// 回采信号提前于mic信号时间差<80ms
// ./Record --echo --vqe=false --channels=2
// ./Record --echo --vqe=true --channels=2
// ./Record --echo --vqe=false --channels=1
void EchoRecordTask::run() {
LOG(info) << "dsp use: " << dspToString(m_dsp);
RkAudio::Format format;
format.sampleRate = 16000;
format.channels = m_channels;
format.period = 10;
m_speex = std::make_shared<SpeexDsp>();
m_speex->start(format.sampleRate, m_channels, format.period);
m_farendBuffer.resize(m_channels * sizeof(int16_t) * format.sampleRate / 1000 * format.period);
m_nearendBuffer.resize(m_channels * sizeof(int16_t) * format.sampleRate / 1000 * format.period);
m_webRtcAecm = std::make_shared<WebRtcAecm>();
m_webRtcAecm->start(format.sampleRate, format.channels, format.period);
m_d->initialize(format.sampleRate, m_channels, format.period);
m_output = std::make_shared<RkAudio::Output>();
if (!m_output->open(sizeof(uint16_t), format.sampleRate, 2, format.period, m_dsp == Vqe)) {
LOG(error) << "audio output open failed.";
return;
}
m_outBuffer.resize(m_channels * sizeof(int16_t) * format.sampleRate / 1000 * format.period);
m_input = std::make_shared<RkAudio::Input>();
m_input->setDataCallback([this, format](const RkAudio::Frame &frame) {
if (m_micOfs) {
m_micOfs->write(reinterpret_cast<const char *>(frame.data), frame.byteSize);
}
memcpy(m_nearendBuffer.data(), frame.data, frame.byteSize);
if (m_dsp == Speex) {
m_speex->echoPlayback(reinterpret_cast<const int16_t *>(m_farendBuffer.data()));
m_speex->echoCapture(reinterpret_cast<const int16_t *>(frame.data), reinterpret_cast<int16_t *>(m_outBuffer.data()));
} else if (m_dsp == AecMobile) {
m_webRtcAecm->echoPlayback(reinterpret_cast<const int16_t *>(m_farendBuffer.data()), m_farendBuffer.size() / 2);
m_webRtcAecm->echoCancellation(reinterpret_cast<int16_t *>(frame.data), reinterpret_cast<int16_t *>(m_nearendBuffer.data()),
reinterpret_cast<int16_t *>(m_outBuffer.data()), frame.frameSize);
} else if (m_dsp == Aec3) {
webrtc::StreamConfig config(format.sampleRate, format.channels); // 单声道
m_d->nearendBuffer->CopyFrom(reinterpret_cast<const int16_t *>(frame.data), config);
m_d->farendBuffer->CopyFrom(reinterpret_cast<const int16_t *>(m_farendBuffer.data()), config);
m_d->echoCanceller->AnalyzeRender(m_d->farendBuffer.get());
m_d->echoCanceller->AnalyzeCapture(m_d->nearendBuffer.get());
m_d->echoCanceller->ProcessCapture(m_d->nearendBuffer.get(), false);
// m_d->echoCanceller->ProcessCapture(&nearendBuffer, &linearOutputBuffer, /*level_change=*/false);
m_d->nearendBuffer->CopyTo(config, reinterpret_cast<int16_t *>(m_outBuffer.data()));
}
if (m_outOfs) {
m_outOfs->write(reinterpret_cast<const char *>(m_outBuffer.data()), m_outBuffer.size());
}
if (m_channels == 2) {
m_output->write(frame.data, frame.byteSize);
} else if (m_channels == 1) {
auto filledData = duplicate(m_outBuffer.data(), m_outBuffer.size());
m_output->write(filledData.data(), filledData.size());
}
memcpy(m_farendBuffer.data(), m_outBuffer.data(), m_outBuffer.size());
// m_output->write(reinterpret_cast<const uint8_t *>(m_buffer.data()), m_buffer.size());
});
m_input->open(format, m_dsp == Vqe);
}