ZLMediaKit/ext-codec/AAC.cpp

470 lines
16 KiB
C++
Raw Permalink Normal View History

2018-10-30 14:59:42 +08:00
/*
2023-12-09 16:23:51 +08:00
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
2018-10-30 14:59:42 +08:00
*
2023-12-09 16:23:51 +08:00
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
2018-10-30 14:59:42 +08:00
*
2023-12-09 16:23:51 +08:00
* Use of this source code is governed by MIT-like license that can be found in the
2020-04-04 20:30:09 +08:00
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
2018-10-30 14:59:42 +08:00
*/
#include "AAC.h"
2023-12-10 10:21:40 +08:00
#include "AACRtp.h"
#include "AACRtmp.h"
#include "Common/Parser.h"
#include "Extension/Factory.h"
2020-06-11 19:21:46 +08:00
#ifdef ENABLE_MP4
#include "mpeg4-aac.h"
#endif
2018-10-30 14:59:42 +08:00
using namespace std;
using namespace toolkit;
2018-10-30 14:59:42 +08:00
namespace mediakit{
2020-05-11 23:34:57 +08:00
unsigned const samplingFrequencyTable[16] = { 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350, 0, 0, 0 };
2018-10-30 14:59:42 +08:00
2022-06-01 11:35:52 +08:00
#ifndef ENABLE_MP4
2020-05-11 23:34:57 +08:00
class AdtsHeader {
2020-05-11 23:25:12 +08:00
public:
unsigned int syncword = 0; // 12 bslbf 同步字The bit string 1111 1111 1111说明一个ADTS帧的开始
unsigned int id; // 1 bslbf MPEG 标示符, 设置为1
unsigned int layer; // 2 uimsbf Indicates which layer is used. Set to 00
unsigned int protection_absent; // 1 bslbf 表示是否误码校验
unsigned int profile; // 2 uimsbf 表示使用哪个级别的AAC如01 Low Complexity(LC)--- AACLC
unsigned int sf_index; // 4 uimsbf 表示使用的采样率下标
unsigned int private_bit; // 1 bslbf
unsigned int channel_configuration; // 3 uimsbf 表示声道数
unsigned int original; // 1 bslbf
unsigned int home; // 1 bslbf
// 下面的为改变的参数即每一帧都不同 [AUTO-TRANSLATED:481aa349]
// The following are the parameters that change in each frame
unsigned int copyright_identification_bit; // 1 bslbf
unsigned int copyright_identification_start; // 1 bslbf
2020-05-11 23:25:12 +08:00
unsigned int aac_frame_length; // 13 bslbf 一个ADTS帧的长度包括ADTS头和raw data block
unsigned int adts_buffer_fullness; // 11 bslbf 0x7FF 说明是码率可变的码流
// no_raw_data_blocks_in_frame 表示ADTS帧中有number_of_raw_data_blocks_in_frame + 1个AAC原始帧. [AUTO-TRANSLATED:3e975531]
// no_raw_data_blocks_in_frame indicates that there are number_of_raw_data_blocks_in_frame + 1 AAC raw frames in the ADTS frame.
// 所以说number_of_raw_data_blocks_in_frame == 0 [AUTO-TRANSLATED:1b8e9697]
// So number_of_raw_data_blocks_in_frame == 0
// 表示说ADTS帧中有一个AAC数据块并不是说没有。(一个AAC原始帧包含一段时间内1024个采样及相关数据) [AUTO-TRANSLATED:4a09d783]
// means that there is one AAC data block in the ADTS frame, not that there is none. (An AAC raw frame contains 1024 samples and related data over a period of time)
unsigned int no_raw_data_blocks_in_frame; // 2 uimsfb
2020-05-11 23:25:12 +08:00
};
static void dumpAdtsHeader(const AdtsHeader &hed, uint8_t *out) {
out[0] = (hed.syncword >> 4 & 0xFF); // 8bit
out[1] = (hed.syncword << 4 & 0xF0); // 4 bit
out[1] |= (hed.id << 3 & 0x08); // 1 bit
out[1] |= (hed.layer << 1 & 0x06); // 2bit
out[1] |= (hed.protection_absent & 0x01); // 1 bit
2020-05-11 23:25:12 +08:00
out[2] = (hed.profile << 6 & 0xC0); // 2 bit
out[2] |= (hed.sf_index << 2 & 0x3C); // 4bit
out[2] |= (hed.private_bit << 1 & 0x02); // 1 bit
out[2] |= (hed.channel_configuration >> 2 & 0x03); // 1 bit
out[3] = (hed.channel_configuration << 6 & 0xC0); // 2 bit
out[3] |= (hed.original << 5 & 0x20); // 1 bit
out[3] |= (hed.home << 4 & 0x10); // 1 bit
out[3] |= (hed.copyright_identification_bit << 3 & 0x08); // 1 bit
out[3] |= (hed.copyright_identification_start << 2 & 0x04); // 1 bit
out[3] |= (hed.aac_frame_length >> 11 & 0x03); // 2 bit
out[4] = (hed.aac_frame_length >> 3 & 0xFF); // 8 bit
out[5] = (hed.aac_frame_length << 5 & 0xE0); // 3 bit
out[5] |= (hed.adts_buffer_fullness >> 6 & 0x1F); // 5 bit
out[6] = (hed.adts_buffer_fullness << 2 & 0xFC); // 6 bit
out[6] |= (hed.no_raw_data_blocks_in_frame & 0x03); // 2 bit
2018-10-30 14:59:42 +08:00
}
static bool parseAacConfig(const string &config, AdtsHeader &adts) {
if (config.size() < 2) {
return false;
}
2020-05-11 23:25:12 +08:00
uint8_t cfg1 = config[0];
uint8_t cfg2 = config[1];
2018-10-30 14:59:42 +08:00
int audioObjectType;
int sampling_frequency_index;
int channel_configuration;
audioObjectType = cfg1 >> 3;
sampling_frequency_index = ((cfg1 & 0x07) << 1) | (cfg2 >> 7);
channel_configuration = (cfg2 & 0x7F) >> 3;
adts.syncword = 0x0FFF;
adts.id = 0;
adts.layer = 0;
adts.protection_absent = 1;
adts.profile = audioObjectType - 1;
adts.sf_index = sampling_frequency_index;
adts.private_bit = 0;
adts.channel_configuration = channel_configuration;
adts.original = 0;
adts.home = 0;
adts.copyright_identification_bit = 0;
adts.copyright_identification_start = 0;
adts.aac_frame_length = 7;
adts.adts_buffer_fullness = 2047;
adts.no_raw_data_blocks_in_frame = 0;
return true;
2018-10-30 14:59:42 +08:00
}
#endif// ENABLE_MP4
2020-05-11 23:25:12 +08:00
int getAacFrameLength(const uint8_t *data, size_t bytes) {
2020-08-08 12:19:04 +08:00
uint16_t len;
if (bytes < 7) return -1;
if (0xFF != data[0] || 0xF0 != (data[1] & 0xF0)) {
return -1;
}
len = ((uint16_t) (data[3] & 0x03) << 11) | ((uint16_t) data[4] << 3) | ((uint16_t) (data[5] >> 5) & 0x07);
return len;
}
string makeAacConfig(const uint8_t *hex, size_t length){
2020-06-11 19:21:46 +08:00
#ifndef ENABLE_MP4
2020-05-11 23:25:12 +08:00
if (!(hex[0] == 0xFF && (hex[1] & 0xF0) == 0xF0)) {
return "";
}
// Get and check the 'profile':
unsigned char profile = (hex[2] & 0xC0) >> 6; // 2 bits
if (profile == 3) {
return "";
}
// Get and check the 'sampling_frequency_index':
unsigned char sampling_frequency_index = (hex[2] & 0x3C) >> 2; // 4 bits
if (samplingFrequencyTable[sampling_frequency_index] == 0) {
return "";
}
// Get and check the 'channel_configuration':
unsigned char channel_configuration = ((hex[2] & 0x01) << 2) | ((hex[3] & 0xC0) >> 6); // 3 bits
unsigned char audioSpecificConfig[2];
unsigned char const audioObjectType = profile + 1;
audioSpecificConfig[0] = (audioObjectType << 3) | (sampling_frequency_index >> 1);
audioSpecificConfig[1] = (sampling_frequency_index << 7) | (channel_configuration << 3);
return string((char *)audioSpecificConfig,2);
2020-06-11 19:21:46 +08:00
#else
struct mpeg4_aac_t aac;
memset(&aac, 0, sizeof(aac));
2020-06-11 19:21:46 +08:00
if (mpeg4_aac_adts_load(hex, length, &aac) > 0) {
char buf[32] = {0};
int len = mpeg4_aac_audio_specific_config_save(&aac, (uint8_t *) buf, sizeof(buf));
if (len > 0) {
return string(buf, len);
}
}
WarnL << "生成aac config失败, adts header:" << hexdump(hex, length);
return "";
#endif
2020-05-11 23:25:12 +08:00
}
int dumpAacConfig(const string &config, size_t length, uint8_t *out, size_t out_size) {
2020-06-11 19:21:46 +08:00
#ifndef ENABLE_MP4
2020-05-11 23:25:12 +08:00
AdtsHeader header;
parseAacConfig(config, header);
header.aac_frame_length = (decltype(header.aac_frame_length))(ADTS_HEADER_LEN + length);
2020-05-11 23:25:12 +08:00
dumpAdtsHeader(header, out);
2020-06-11 19:21:46 +08:00
return ADTS_HEADER_LEN;
#else
struct mpeg4_aac_t aac;
memset(&aac, 0, sizeof(aac));
2020-06-11 19:21:46 +08:00
int ret = mpeg4_aac_audio_specific_config_load((uint8_t *) config.data(), config.size(), &aac);
if (ret > 0) {
ret = mpeg4_aac_adts_save(&aac, length, out, out_size);
}
if (ret < 0) {
WarnL << "生成adts头失败:" << ret << ", aac config:" << hexdump(config.data(), config.size());
}
assert((int)out_size >= ret);
2020-06-11 19:21:46 +08:00
return ret;
#endif
2020-05-11 23:25:12 +08:00
}
bool parseAacConfig(const string &config, int &samplerate, int &channels) {
2020-06-11 19:21:46 +08:00
#ifndef ENABLE_MP4
2020-05-11 23:25:12 +08:00
AdtsHeader header;
if (!parseAacConfig(config, header)) {
return false;
}
2020-05-11 23:25:12 +08:00
samplerate = samplingFrequencyTable[header.sf_index];
channels = header.channel_configuration;
2020-06-11 19:21:46 +08:00
return true;
#else
struct mpeg4_aac_t aac;
memset(&aac, 0, sizeof(aac));
2020-06-11 19:21:46 +08:00
int ret = mpeg4_aac_audio_specific_config_load((uint8_t *) config.data(), config.size(), &aac);
if (ret > 0) {
samplerate = aac.sampling_frequency;
channels = aac.channels;
return true;
}
WarnL << "获取aac采样率、声道数失败:" << hexdump(config.data(), config.size());
return false;
#endif
2018-10-30 14:59:42 +08:00
}
2021-02-05 11:51:16 +08:00
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* aac类型SDP
* aac type SDP
* [AUTO-TRANSLATED:c06f00b1]
2021-02-05 11:51:16 +08:00
*/
class AACSdp : public Sdp {
public:
/**
*
* @param aac_cfg aac两个字节的配置描述
2023-12-09 16:23:51 +08:00
* @param payload_type rtp payload type
2021-02-05 11:51:16 +08:00
* @param sample_rate
2023-12-09 16:23:51 +08:00
* @param channels
2021-02-05 11:51:16 +08:00
* @param bitrate
* Constructor
* @param aac_cfg aac two-byte configuration description
* @param payload_type rtp payload type
* @param sample_rate audio sampling rate
* @param channels number of channels
* @param bitrate bitrate
* [AUTO-TRANSLATED:6fe1f3b2]
2021-02-05 11:51:16 +08:00
*/
2023-12-09 16:23:51 +08:00
AACSdp(const string &aac_cfg, int payload_type, int sample_rate, int channels, int bitrate)
: Sdp(sample_rate, payload_type) {
2021-02-05 11:51:16 +08:00
_printer << "m=audio 0 RTP/AVP " << payload_type << "\r\n";
if (bitrate) {
_printer << "b=AS:" << bitrate << "\r\n";
}
2023-12-09 16:23:51 +08:00
_printer << "a=rtpmap:" << payload_type << " " << getCodecName(CodecAAC) << "/" << sample_rate << "/" << channels << "\r\n";
2021-02-05 11:51:16 +08:00
string configStr;
2023-12-09 16:23:51 +08:00
char buf[4] = { 0 };
for (auto &ch : aac_cfg) {
2021-02-05 11:51:16 +08:00
snprintf(buf, sizeof(buf), "%02X", (uint8_t)ch);
configStr.append(buf);
}
_printer << "a=fmtp:" << payload_type << " streamtype=5;profile-level-id=1;mode=AAC-hbr;"
<< "sizelength=13;indexlength=3;indexdeltalength=3;config=" << configStr << "\r\n";
}
2023-12-09 16:23:51 +08:00
string getSdp() const override { return _printer; }
2021-02-05 11:51:16 +08:00
private:
_StrPrinter _printer;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
2022-06-01 11:35:52 +08:00
AACTrack::AACTrack(int samplerate, int channel, int profile) : _channel(channel), _sampleRate(samplerate) {
uint8_t audioSpecificConfig[2] = { 0 };
uint8_t audioObjectType = profile+1;
int samplingFrequencyIndex = 0;
for (size_t i = 0; i < sizeof(samplingFrequencyTable) / sizeof(samplingFrequencyTable[0]); i++) {
if (samplingFrequencyTable[i] == (unsigned)samplerate) {
samplingFrequencyIndex = i;
break;
}
}
_cfg.resize(2);
_cfg[0] = (audioObjectType << 3) | (samplingFrequencyIndex >> 1);
_cfg[1] = (samplingFrequencyIndex << 7) | (channel << 3);
// update();
2022-06-01 11:35:52 +08:00
}
2021-02-05 11:51:16 +08:00
AACTrack::AACTrack(const string &aac_cfg) {
if (aac_cfg.size() < 2) {
throw std::invalid_argument("adts配置必须最少2个字节");
}
_cfg = aac_cfg;
2023-12-09 16:23:51 +08:00
update();
2021-02-05 11:51:16 +08:00
}
CodecId AACTrack::getCodecId() const {
return CodecAAC;
}
2023-12-09 16:23:51 +08:00
bool AACTrack::ready() const {
2021-02-05 11:51:16 +08:00
return !_cfg.empty();
}
int AACTrack::getAudioSampleRate() const {
return _sampleRate;
}
int AACTrack::getAudioSampleBit() const {
return _sampleBit;
}
int AACTrack::getAudioChannel() const {
return _channel;
}
2023-12-09 16:23:51 +08:00
static Frame::Ptr addADTSHeader(const Frame::Ptr &frame_in, const std::string &aac_config) {
auto frame = FrameImp::create();
frame->_codec_id = CodecAAC;
// 生成adts头 [AUTO-TRANSLATED:c285b9b0]
// Generate adts header
2023-12-09 16:23:51 +08:00
char adts_header[32] = { 0 };
auto size = dumpAacConfig(aac_config, frame_in->size(), (uint8_t *)adts_header, sizeof(adts_header));
CHECK(size > 0, "Invalid adts config");
frame->_prefix_size = size;
frame->_dts = frame_in->dts();
frame->_buffer.assign(adts_header, size);
frame->_buffer.append(frame_in->data(), frame_in->size());
2023-12-09 22:34:22 +08:00
frame->setIndex(frame_in->getIndex());
2023-12-09 16:23:51 +08:00
return frame;
}
bool AACTrack::inputFrame(const Frame::Ptr &frame) {
if (!frame->prefixSize()) {
2023-12-09 16:23:51 +08:00
CHECK(ready());
return inputFrame_l(addADTSHeader(frame, _cfg));
}
bool ret = false;
// 有adts头尝试分帧 [AUTO-TRANSLATED:f691c4ce]
// There is an adts header, try to frame
int64_t dts = frame->dts();
int64_t pts = frame->pts();
auto ptr = frame->data();
auto end = frame->data() + frame->size();
while (ptr < end) {
2023-12-09 16:23:51 +08:00
auto frame_len = getAacFrameLength((uint8_t *)ptr, end - ptr);
if (frame_len < ADTS_HEADER_LEN) {
break;
}
if (frame_len == (int)frame->size()) {
return inputFrame_l(frame);
}
2023-12-09 16:23:51 +08:00
auto sub_frame = std::make_shared<FrameInternalBase<FrameFromPtr>>(frame, (char *)ptr, frame_len, dts, pts, ADTS_HEADER_LEN);
ptr += frame_len;
if (ptr > end) {
WarnL << "invalid aac length in adts header: " << frame_len
<< ", remain data size: " << end - (ptr - frame_len);
break;
}
if (inputFrame_l(sub_frame)) {
ret = true;
2021-02-05 11:51:16 +08:00
}
2023-12-09 16:23:51 +08:00
dts += 1024 * 1000 / getAudioSampleRate();
pts += 1024 * 1000 / getAudioSampleRate();
2021-02-05 11:51:16 +08:00
}
return ret;
2021-02-05 11:51:16 +08:00
}
bool AACTrack::inputFrame_l(const Frame::Ptr &frame) {
2023-12-09 16:23:51 +08:00
if (_cfg.empty() && frame->prefixSize()) {
// 未获取到aac_cfg信息根据7个字节的adts头生成aac config [AUTO-TRANSLATED:1b80f562]
// Unable to get aac_cfg information, generate aac config based on the 7-byte adts header
2023-12-09 16:23:51 +08:00
_cfg = makeAacConfig((uint8_t *)(frame->data()), frame->prefixSize());
update();
2021-02-05 11:51:16 +08:00
}
if (frame->size() > frame->prefixSize()) {
// 除adts头外有实际负载 [AUTO-TRANSLATED:5b7c088e]
// There is an actual payload besides the adts header
return AudioTrack::inputFrame(frame);
2021-02-05 11:51:16 +08:00
}
return false;
2021-02-05 11:51:16 +08:00
}
2023-12-09 16:23:51 +08:00
toolkit::Buffer::Ptr AACTrack::getExtraData() const {
CHECK(ready());
return std::make_shared<BufferString>(_cfg);
}
2023-12-09 16:23:51 +08:00
void AACTrack::setExtraData(const uint8_t *data, size_t size) {
CHECK(size >= 2);
_cfg.assign((char *)data, size);
update();
}
bool AACTrack::update() {
return parseAacConfig(_cfg, _sampleRate, _channel);
2021-02-05 11:51:16 +08:00
}
2023-12-09 16:23:51 +08:00
Track::Ptr AACTrack::clone() const {
return std::make_shared<AACTrack>(*this);
2021-02-05 11:51:16 +08:00
}
2023-12-09 16:23:51 +08:00
Sdp::Ptr AACTrack::getSdp(uint8_t payload_type) const {
if (!ready()) {
2020-04-18 18:46:20 +08:00
WarnL << getCodecName() << " Track未准备好";
2019-06-28 16:12:39 +08:00
return nullptr;
}
2023-12-09 16:23:51 +08:00
return std::make_shared<AACSdp>(getExtraData()->toString(), payload_type, getAudioSampleRate(), getAudioChannel(), getBitRate() / 1024);
2019-06-28 16:12:39 +08:00
}
2023-12-10 10:21:40 +08:00
namespace {
CodecId getCodec() {
return CodecAAC;
}
Track::Ptr getTrackByCodecId(int sample_rate, int channels, int sample_bit) {
if (sample_rate && channels)
return std::make_shared<AACTrack>(sample_rate, channels, 1);
2023-12-10 10:21:40 +08:00
return std::make_shared<AACTrack>();
}
Track::Ptr getTrackBySdp(const SdpTrack::Ptr &track) {
string aac_cfg_str = findSubString(track->_fmtp.data(), "config=", ";");
if (aac_cfg_str.empty()) {
aac_cfg_str = findSubString(track->_fmtp.data(), "config=", nullptr);
}
if (aac_cfg_str.empty()) {
// 如果sdp中获取不到aac config信息那么在rtp也无法获取那么忽略该Track [AUTO-TRANSLATED:995bc20d]
// If aac config information cannot be obtained from sdp, then it cannot be obtained from rtp either, so ignore this Track
2023-12-10 10:21:40 +08:00
return nullptr;
}
string aac_cfg;
for (size_t i = 0; i < aac_cfg_str.size() / 2; ++i) {
unsigned int cfg;
sscanf(aac_cfg_str.substr(i * 2, 2).data(), "%02X", &cfg);
cfg &= 0x00FF;
aac_cfg.push_back((char)cfg);
}
return std::make_shared<AACTrack>(aac_cfg);
}
RtpCodec::Ptr getRtpEncoderByCodecId(uint8_t pt) {
return std::make_shared<AACRtpEncoder>();
}
RtpCodec::Ptr getRtpDecoderByCodecId() {
return std::make_shared<AACRtpDecoder>();
}
RtmpCodec::Ptr getRtmpEncoderByTrack(const Track::Ptr &track) {
return std::make_shared<AACRtmpEncoder>(track);
}
RtmpCodec::Ptr getRtmpDecoderByTrack(const Track::Ptr &track) {
return std::make_shared<AACRtmpDecoder>(track);
}
size_t aacPrefixSize(const char *data, size_t bytes) {
uint8_t *ptr = (uint8_t *)data;
size_t prefix = 0;
if (!(bytes > ADTS_HEADER_LEN && ptr[0] == 0xFF && (ptr[1] & 0xF0) == 0xF0)) {
return 0;
}
return ADTS_HEADER_LEN;
}
Frame::Ptr getFrameFromPtr(const char *data, size_t bytes, uint64_t dts, uint64_t pts) {
return std::make_shared<FrameFromPtr>(CodecAAC, (char *)data, bytes, dts, pts, aacPrefixSize(data, bytes));
}
} // namespace
CodecPlugin aac_plugin = { getCodec,
getTrackByCodecId,
getTrackBySdp,
getRtpEncoderByCodecId,
getRtpDecoderByCodecId,
getRtmpEncoderByTrack,
getRtmpDecoderByTrack,
getFrameFromPtr };
} // namespace mediakit