增加FFmpegEncoder和test_audio_transcode, 主要修改有:

- 使用FFmpegAudioFifo来简化代码 - 修复转码时间戳不规则递增，导致的音频破音和不连续问题 - 使用double来保存_tsp，避免出现2.5ms的frame_size(ffmpeg内置的opus)
2024-11-22 19:00:01 +08:00 · 2022-05-31 17:10:02 +08:00 · 2022-05-31 17:10:02 +08:00 · 5c92b4e48f
commit 5c92b4e48f
parent b33c227cf4
4 changed files with 611 additions and 20 deletions
--- a/src/Codec/Transcode.cpp
+++ b/src/Codec/Transcode.cpp
@ -13,10 +13,17 @@
 #include <dlfcn.h>
 #endif
 #include "Util/File.h"
+#include "Util/util.h"
 #include "Util/uv_errno.h"
+#include <float.h>
 #include "Transcode.h"
 #include "Extension/AAC.h"
 #include "Common/config.h"
+#include "Extension/Opus.h"
+#include "Extension/G711.h"
+#include "Extension/H264.h"
+#include "Extension/H265.h"
+
 #define MAX_DELAY_SECOND 3

 using namespace std;
@ -66,6 +73,9 @@ static bool setupFFmpeg_l() {
 #if (LIBAVCODEC_VERSION_MAJOR < 58)
    avcodec_register_all();
 #endif
+    InfoL << "libavcodec " << LIBAVCODEC_VERSION_MAJOR << "." << LIBAVCODEC_VERSION_MINOR;
+    //InfoL << "libavformat " << LIBAVFORMAT_VERSION_MAJOR << "." << LIBAVFORMAT_VERSION_MINOR;
+    InfoL << "libavutil " << LIBAVUTIL_VERSION_MAJOR << "." << LIBAVUTIL_VERSION_MAJOR;
    return true;
 }

@ -411,21 +421,23 @@ FFmpegDecoder::FFmpegDecoder(const Track::Ptr &track, int thread_num, const std:
        _context->flags |= AV_CODEC_FLAG_LOW_DELAY;
        _context->flags2 |= AV_CODEC_FLAG2_FAST;
        if (track->getTrackType() == TrackVideo) {
-            _context->width = static_pointer_cast<VideoTrack>(track)->getVideoWidth();
-            _context->height = static_pointer_cast<VideoTrack>(track)->getVideoHeight();
-        }
-
+            auto video = static_pointer_cast<VideoTrack>(track);
+            _context->width = video->getVideoWidth();
+            _context->height = video->getVideoHeight();
+            InfoL << "decode video " << video->getCodecName() << " " << _context->width << "x" << _context->height;
+        } else {
+            auto audio = static_pointer_cast<AudioTrack>(track);
+            InfoL << "decode audio " << audio->getCodecName() << " " << audio->getAudioSampleRate() << "x" << audio->getAudioChannel();
            switch (track->getCodecId()) {
                case CodecG711A:
                case CodecG711U: {
-                AudioTrack::Ptr audio = static_pointer_cast<AudioTrack>(track);
                    _context->channels = audio->getAudioChannel();
                    _context->sample_rate = audio->getAudioSampleRate();
                    _context->channel_layout = av_get_default_channel_layout(_context->channels);
                    break;
                }
-            default:
-                break;
+                default: break;
+            }
        }
        AVDictionary *dict = nullptr;
        if (thread_num <= 0) {
@ -491,10 +503,6 @@ void FFmpegDecoder::flush() {
    }
 }

-const AVCodecContext *FFmpegDecoder::getContext() const {
-    return _context.get();
-}
-
 bool FFmpegDecoder::inputFrame_l(const Frame::Ptr &frame, bool live, bool enable_merge) {
    if (_do_merger && enable_merge) {
        return _merger.inputFrame(frame, [this, live](uint64_t dts, uint64_t pts, const Buffer::Ptr &buffer, bool have_idr) {
@ -575,6 +583,82 @@ void FFmpegDecoder::onDecode(const FFmpegFrame::Ptr &frame) {

 ////////////////////////////////////////////////////////////////////////////////////////////////////////////

+FFmpegAudioFifo::~FFmpegAudioFifo() {
+    if (_fifo) {
+        av_audio_fifo_free(_fifo);
+        _fifo = nullptr;
+    }
+}
+
+int FFmpegAudioFifo::size() const {
+    return _fifo ? av_audio_fifo_size(_fifo) : 0;
+}
+
+bool FFmpegAudioFifo::Write(const AVFrame *frame) {
+    _format = (AVSampleFormat)frame->format;
+    if (!_fifo) {
+        _fifo = av_audio_fifo_alloc(_format, frame->channels, frame->nb_samples);
+        if (!_fifo) {
+            WarnL << "av_audio_fifo_alloc " << frame->channels << "x" << frame->nb_samples << "error";
+            return false;
+        }
+    }
+
+    _channels = frame->channels;
+    if (_samplerate != frame->sample_rate) {
+        _samplerate = frame->sample_rate;
+        // 假定传入frame的时间戳是以ms为单位的
+        _timebase = 1000.0 / _samplerate;
+    }
+    if (frame->pts != AV_NOPTS_VALUE) {
+        // 计算fifo audio第一个采样的时间戳
+        double tsp = frame->pts - _timebase * av_audio_fifo_size(_fifo);
+        // flv.js和webrtc对音频时间戳增量有要求, rtc要求更加严格！
+        // 得尽量保证时间戳是按照sample_size累加，否则容易出现破音或杂音等问题
+        if (fabs(_tsp) < DBL_EPSILON || fabs(tsp - _tsp) > 200) {
+            InfoL << "reset base_tsp " << (int64_t)_tsp << "->" << (int64_t)tsp;
+            _tsp = tsp;
+        }
+    } else {
+        _tsp = 0;
+    }
+
+    av_audio_fifo_write(_fifo, (void **)frame->data, frame->nb_samples);
+    return true;
+}
+
+bool FFmpegAudioFifo::Read(AVFrame *frame, int sample_size) {
+    assert(_fifo);
+    int fifo_size = av_audio_fifo_size(_fifo);
+    if (fifo_size < sample_size)
+        return false;
+    // fill linedata
+    av_samples_get_buffer_size(frame->linesize, _channels, sample_size, _format, 0);
+    frame->nb_samples = sample_size;
+    frame->format = _format;
+    frame->channel_layout = av_get_default_channel_layout(_channels);
+    frame->sample_rate = _samplerate;
+    if (fabs(_tsp) > DBL_EPSILON) {
+        frame->pts = _tsp;
+        // advance tsp by sample_size
+        _tsp += sample_size * _timebase;
+    }
+    else {
+        frame->pts = AV_NOPTS_VALUE;
+    }
+
+    int ret = av_frame_get_buffer(frame, 0);
+    if (ret < 0) {
+        WarnL << "av_frame_get_buffer error " << ffmpeg_err(ret);
+        return false;
+    }
+
+    av_audio_fifo_read(_fifo, (void **)frame->data, sample_size);
+    return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
 FFmpegSwr::FFmpegSwr(AVSampleFormat output, int channel, int channel_layout, int samplerate) {
    _target_format = output;
    _target_channels = channel;
@ -694,5 +778,334 @@ FFmpegFrame::Ptr FFmpegSws::inputFrame(const FFmpegFrame::Ptr &frame, int &ret,
    return nullptr;
 }

+///////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void setupContext(AVCodecContext *_context, int bitrate) {
+    //保存AVFrame的引用
+#ifdef FF_API_OLD_ENCDEC
+    _context->refcounted_frames = 1;
+#endif
+    _context->flags |= AV_CODEC_FLAG_LOW_DELAY;
+    _context->flags2 |= AV_CODEC_FLAG2_FAST;
+    _context->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
+    _context->time_base.num = 1;
+    _context->time_base.den = 1000; // {1, 1000}
+    _context->bit_rate = bitrate;
+}
+
+FFmpegEncoder::FFmpegEncoder(const Track::Ptr &track, int thread_num) {
+    setupFFmpeg();
+    const AVCodec *codec = nullptr;
+    const AVCodec *codec_default = nullptr;
+    _codecId = track->getCodecId();
+    switch (_codecId) {
+    case CodecH264:
+        codec_default = getCodec<false>({ AV_CODEC_ID_H264 });
+        if (checkIfSupportedNvidia()) {
+            codec = getCodec<false>({ { "libopenh264" },
+                                      { AV_CODEC_ID_H264 },
+                                      { "h264_qsv" },
+                                      { "h264_videotoolbox" },
+                                      { "h264_cuvid" },
+                                      { "h264_nvmpi" } });
+        } else {
+            codec = getCodec<false>({ { "libopenh264" }, { AV_CODEC_ID_H264 }, { "h264_qsv" }, { "h264_videotoolbox" }, { "h264_nvmpi" } });
+        }
+        break;
+    case CodecH265:
+        codec_default = getCodec<false>({ AV_CODEC_ID_HEVC });
+        if (checkIfSupportedNvidia()) {
+            codec = getCodec<false>({ { AV_CODEC_ID_HEVC }, { "hevc_qsv" }, { "hevc_videotoolbox" }, { "hevc_cuvid" }, { "hevc_nvmpi" } });
+        } else {
+            codec = getCodec<false>({ { AV_CODEC_ID_HEVC }, { "hevc_qsv" }, { "hevc_videotoolbox" }, { "hevc_nvmpi" } });
+        }
+        break;
+    case CodecAAC:
+        codec = getCodec<false>({ AV_CODEC_ID_AAC });
+        break;
+    case CodecG711A:
+        codec = getCodec<false>({ AV_CODEC_ID_PCM_ALAW });
+        break;
+    case CodecG711U:
+        codec = getCodec<false>({ AV_CODEC_ID_PCM_MULAW });
+        break;
+    case CodecOpus:
+        codec = getCodec<false>({ AV_CODEC_ID_OPUS });
+        break;
+    case CodecVP8:
+        codec = getCodec<false>({ AV_CODEC_ID_VP8 });
+        break;
+    case CodecVP9:
+        codec = getCodec<false>({ AV_CODEC_ID_VP9 });
+        break;
+    default:
+        break;
+    }
+
+    if (!codec) {
+        throw std::runtime_error("未找到编码器");
+    }
+
+    if (thread_num <= 0) {
+        av_dict_set(&_dict, "threads", "auto", 0);
+    } else {
+        av_dict_set(&_dict, "threads", to_string(MIN(thread_num, thread::hardware_concurrency())).data(), 0);
+    }
+    av_dict_set(&_dict, "zerolatency", "1", 0);
+    if (strcmp(codec->name, "libx264") == 0 || strcmp(codec->name, "libx265") == 0) {
+        av_dict_set(&_dict, "preset", "ultrafast", 0);
+    }
+
+    while (true) {
+        bool ret = false;
+        if (getTrackType() == TrackVideo) {
+            // 不设置时，仅第一个I帧前存一次sps和pps
+            // 设置后，I帧钱不会存储sps和pps, 但_context->extradata会有数据，需要手动处理
+            // _context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+            VideoTrack::Ptr video = static_pointer_cast<VideoTrack>(track);
+            ret = openVideoCodec(video->getVideoWidth(), video->getVideoHeight(), track->getBitRate(), codec);
+        } else {
+            AudioTrack::Ptr audio = static_pointer_cast<AudioTrack>(track);
+            ret = openAudioCodec(audio->getAudioSampleRate(), audio->getAudioChannel(), track->getBitRate(), codec);
+        }
+
+        if (ret) {
+            _codec = codec;
+            //成功
+            InfoL << "打开编码器成功:" << codec->name << ", frameSize " << _context->frame_size;
+            // we do not send complete frames, check this
+            if (getTrackType() == TrackAudio) {
+                var_frame_size = codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE;
+                if (var_frame_size) {
+                    InfoL << codec->name << " support var frame_size";
+                }
+            }
+            break;
+        }
+
+        if (codec_default && codec_default != codec) {
+            //硬件编解码器打开失败，尝试软件的
+            WarnL << "打开编码器" << codec->name << "失败，原因是:" << ffmpeg_err(ret) << ", 再尝试打开编码器"
+                  << codec_default->name;
+            codec = codec_default;
+            continue;
+        }
+        throw std::runtime_error(StrPrinter << "打开编码器" << codec->name << "失败:" << ffmpeg_err(ret));
+    }
+}
+
+FFmpegEncoder::~FFmpegEncoder() {
+    stopThread(true);
+    flush();
+    av_dict_free(&_dict);
+}
+
+bool FFmpegEncoder::openVideoCodec(int width, int height, int bitrate, const AVCodec *codec) {
+    _context.reset(avcodec_alloc_context3(codec), [](AVCodecContext *ctx) { avcodec_free_context(&ctx); });
+    if (_context) {
+        setupContext(_context.get(), bitrate);
+
+        // 不设置时，仅第一个I帧前存一次sps和pps
+        // 设置后，I帧钱不会存储sps和pps, 但_context->extradata会有数据，需要手动处理
+        // _context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+        _context->width = width;
+        _context->height = height;
+        // gop
+        _context->gop_size = 200;
+        // 禁用b帧
+        _context->max_b_frames = 0;
+        _context->has_b_frames = 0;
+        InfoL << "openVideoCodec " << codec->name << " " << _context->width << "x" << _context->height;
+        _context->pix_fmt = AV_PIX_FMT_YUV420P; // codec->pix_fmts[0];
+        // sws_.reset(new FFmpegSws(_context->pix_fmt, _context->width, _context->height));
+        return avcodec_open2(_context.get(), codec, &_dict) >= 0;
+    }
+    return false;
+}
+
+bool FFmpegEncoder::openAudioCodec(int samplerate, int channel, int bitrate, const AVCodec *codec) {
+    _context.reset(avcodec_alloc_context3(codec), [](AVCodecContext *ctx) { avcodec_free_context(&ctx); });
+
+    if (_context) {
+        setupContext(_context.get(), bitrate);
+
+        _context->sample_fmt = codec->sample_fmts[0];
+        _context->sample_rate = samplerate;
+        _context->channels = channel;
+        _context->channel_layout = av_get_default_channel_layout(_context->channels);
+
+        if (getCodecId() == CodecOpus)
+            _context->compression_level = 1;
+
+        //_sample_bytes = av_get_bytes_per_sample(_context->sample_fmt) * _context->channels;
+        _swr.reset(
+            new FFmpegSwr(_context->sample_fmt, _context->channels, _context->channel_layout, _context->sample_rate));
+
+        InfoL << "openAudioCodec " << codec->name << " " << _context->sample_rate << "x" << _context->channels;
+        return avcodec_open2(_context.get(), codec, &_dict) >= 0;
+    }
+    return false;
+}
+
+void FFmpegEncoder::flush() {
+    while (true) {
+        auto packet = alloc_av_packet();
+        auto ret = avcodec_receive_packet(_context.get(), packet.get());
+        if (ret == AVERROR(EAGAIN)) {
+            avcodec_send_frame(_context.get(), nullptr);
+            continue;
+        }
+        if (ret == AVERROR_EOF) {
+            break;
+        }
+        if (ret < 0) {
+            WarnL << "avcodec_receive_frame failed:" << ffmpeg_err(ret);
+            break;
+        }
+        onEncode(packet.get());
+    }
+}
+
+bool FFmpegEncoder::inputFrame(const FFmpegFrame::Ptr &frame, bool async) {
+    if (async && !TaskManager::isEnabled() && getContext()->codec_type == AVMEDIA_TYPE_VIDEO) {
+        //开启异步编码，且为视频，尝试启动异步解码线程
+        startThread("encoder thread");
+    }
+
+    if (!async || !TaskManager::isEnabled()) {
+        return inputFrame_l(frame);
+    }
+
+    return addEncodeTask([this, frame]() { inputFrame_l(frame); });
+}
+
+bool FFmpegEncoder::inputFrame_l(FFmpegFrame::Ptr input) {
+    AVFrame *frame = input->get();
+    AVCodecContext *context = _context.get();
+    if (getTrackType() == TrackAudio) {
+        if (_swr) {
+            // 转成同样采样率和通道
+            input = _swr->inputFrame(input);
+            frame = input->get();
+            // 保证每次塞给解码器的都是一帧音频
+            if (!var_frame_size && _context->frame_size && frame->nb_samples != _context->frame_size) {
+                // add this frame to _audio_buffer
+                if (!_fifo)
+                    _fifo.reset(new FFmpegAudioFifo());
+                // TraceL << "in " << frame->pts << ",samples " << frame->nb_samples;
+                _fifo->Write(frame);
+                while (1) {
+                    FFmpegFrame audio_frame;
+                    if (!_fifo->Read(audio_frame.get(), _context->frame_size)){
+                        break;
+                    }
+                    if (!encodeFrame(audio_frame.get())) {
+                        break;
+                    }
+                }
+                return true;
+            }
+        }
+    } else {
+        if (frame->format != context->pix_fmt || frame->width != context->width || frame->height != context->height) {
+            if (_sws) {
+                input = _sws->inputFrame(input);
+                frame = input->get();
+            } else {
+                // @todo reopen videocodec?
+                openVideoCodec(frame->width, frame->height, 512000, _codec);
+            }
+        }
+    }
+    return encodeFrame(frame);
+}
+
+bool FFmpegEncoder::encodeFrame(AVFrame *frame) {
+    // TraceL << "enc " << frame->pts;
+    int ret = avcodec_send_frame(_context.get(), frame);
+    if (ret < 0) {
+        WarnL << "Error sending a frame " << frame->pts << " to the encoder: " << ffmpeg_err(ret);
+        return false;
+    }
+    while (ret >= 0) {
+        auto packet = alloc_av_packet();
+        ret = avcodec_receive_packet(_context.get(), packet.get());
+        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
+            break;
+        else if (ret < 0) {
+            WarnL << "Error encoding a frame: " << ffmpeg_err(ret);
+            return false;
+        }
+        // TraceL << "out " << packet->pts << "," << packet->dts << ", size: " << packet->size;
+        onEncode(packet.get());
+    }
+    return true;
+}
+
+void FFmpegEncoder::onEncode(AVPacket *packet) {
+    // process frame
+    if (!_cb)
+        return;
+    switch (_codecId) {
+        case CodecH264: {
+            auto frame = FrameImp::create<H264Frame>();
+            frame->_dts = packet->dts;
+            frame->_pts = packet->pts;
+            frame->_buffer.assign((const char *)packet->data, packet->size);
+            frame->_prefix_size = prefixSize((const char *)packet->data, packet->size);
+            _cb(frame);
+            break;
+        }
+        case CodecH265: {
+            auto frame = FrameImp::create<H265Frame>();
+            frame->_dts = packet->dts;
+            frame->_pts = packet->pts;
+            frame->_buffer.assign((const char *)packet->data, packet->size);
+            frame->_prefix_size = prefixSize((const char *)packet->data, packet->size);
+            _cb(frame);
+            break;
+        }
+        case CodecAAC: {
+            auto frame = FrameImp::create<>();
+            frame->_codec_id = _codecId;
+            frame->_dts = packet->dts;
+            frame->_pts = packet->pts;
+            frame->_buffer.reserve(ADTS_HEADER_LEN + packet->size);
+            if (_context && _context->extradata && _context->extradata_size) {
+                uint8_t adts[ADTS_HEADER_LEN];
+                auto cfg = std::string((const char *)_context->extradata, _context->extradata_size);
+                dumpAacConfig(cfg, packet->size, adts, ADTS_HEADER_LEN);
+                frame->_prefix_size = ADTS_HEADER_LEN;
+                frame->_buffer.append((char*)adts, ADTS_HEADER_LEN);
+            }
+            frame->_buffer.append((const char *)packet->data, packet->size);
+            _cb(frame);
+            break;
+        }
+        case CodecOpus:
+        case CodecG711A:
+        case CodecG711U: {
+            auto frame = FrameImp::create<>();
+            frame->_codec_id = _codecId;
+            frame->_dts = packet->dts;
+            frame->_pts = packet->pts;
+            frame->_buffer.assign((const char *)packet->data, packet->size);
+            _cb(frame);
+            break;
+        }
+        case CodecVP8:
+        case CodecVP9: {
+            auto frame = FrameImp::create<>();
+            frame->_codec_id = _codecId;
+            frame->_dts = packet->dts;
+            frame->_pts = packet->pts;
+            frame->_buffer.assign((const char *)packet->data, packet->size);
+            _cb(frame);
+            break;
+        }
+        default: break;
+    }
+}
+
 } //namespace mediakit
 #endif//ENABLE_FFMPEG
--- a/src/Codec/Transcode.h
+++ b/src/Codec/Transcode.h
@ -63,6 +63,24 @@ private:
    SwrContext *_ctx = nullptr;
 };

+class FFmpegAudioFifo {
+public:
+    FFmpegAudioFifo() = default;
+    ~FFmpegAudioFifo();
+
+    bool Write(const AVFrame *frame);
+    bool Read(AVFrame *frame, int sample_size);
+    int size() const;
+
+private:
+    int _channels = 0;
+    int _samplerate = 0;
+    double _tsp = 0;
+    double _timebase = 0;
+    AVAudioFifo *_fifo = nullptr;
+    AVSampleFormat _format = AV_SAMPLE_FMT_NONE;
+};
+
 class TaskManager {
 public:
    TaskManager() = default;
@ -108,7 +126,7 @@ public:
    bool inputFrame(const Frame::Ptr &frame, bool live, bool async, bool enable_merge = true);
    void setOnDecode(onDec cb);
    void flush();
-    const AVCodecContext *getContext() const;
+    const AVCodecContext *getContext() const { return _context.get(); }

 private:
    void onDecode(const FFmpegFrame::Ptr &frame);
@ -145,6 +163,41 @@ private:
    AVPixelFormat _target_format = AV_PIX_FMT_NONE;
 };

+class FFmpegEncoder : public TaskManager, public CodecInfo {
+public:
+    using Ptr = std::shared_ptr<FFmpegEncoder>;
+    using onEnc = std::function<void(const Frame::Ptr &)>;
+
+    FFmpegEncoder(const Track::Ptr &track, int thread_num = 2);
+    ~FFmpegEncoder() override;
+
+    void flush();
+    CodecId getCodecId() const override { return _codecId; }
+    const AVCodecContext *getContext() const { return _context.get(); }
+
+    void setOnEncode(onEnc cb) { _cb = std::move(cb); }
+    bool inputFrame(const FFmpegFrame::Ptr &frame, bool async);
+
+private:
+    bool inputFrame_l(FFmpegFrame::Ptr frame);
+    bool encodeFrame(AVFrame *frame);
+    void onEncode(AVPacket *packet);
+    bool openVideoCodec(int width, int height, int bitrate, const AVCodec *codec);
+    bool openAudioCodec(int samplerate, int channel, int bitrate, const AVCodec *codec);
+
+private:
+    onEnc _cb;
+    CodecId _codecId;
+    const AVCodec *_codec = nullptr;
+    AVDictionary *_dict = nullptr;
+    std::shared_ptr<AVCodecContext> _context;
+
+    std::unique_ptr<FFmpegSws> _sws;
+    std::unique_ptr<FFmpegSwr> _swr;
+    std::unique_ptr<FFmpegAudioFifo> _fifo;
+    bool var_frame_size = false;
+};
+
 }//namespace mediakit
 #endif// ENABLE_FFMPEG
 #endif //ZLMEDIAKIT_TRANSCODE_H
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -33,7 +33,14 @@ foreach(TEST_SRC ${TEST_SRC_LIST})
    endif()
  endif()

-  message(STATUS "add test: ${TEST_EXE_NAME}")
+  if (NOT ENABLE_FFMPEG)
+    # 暂时过滤掉依赖 WebRTC 的测试模块
+    if ("${TEST_EXE_NAME}" MATCHES "test_audio_transcode")
+      continue()
+    endif ()
+  endif ()
+
+  message(STATUS "add test:${TEST_EXE_NAME}")
  add_executable(${TEST_EXE_NAME} ${TEST_SRC})
  target_compile_options(${TEST_EXE_NAME}
    PRIVATE ${COMPILE_OPTIONS_DEFAULT})
--- a/tests/test_audio_transcode.cpp
+++ b/tests/test_audio_transcode.cpp
@ -0,0 +1,118 @@
+#include "Codec/Transcode.h"
+#include "Record/MP4Demuxer.h"
+#include "Record/MP4Muxer.h"
+#include "Extension/AAC.h"
+#include "Extension/Opus.h"
+#include "Extension/G711.h"
+#include "Util/logger.h"
+using namespace mediakit;
+struct TransCtx {
+    using Ptr = std::shared_ptr<TransCtx>;
+    TransCtx(const char *prefix, CodecId codec) {
+        char path[256];
+        Track::Ptr track;
+        switch (codec) {
+        case CodecAAC:
+            track.reset(new AACTrack(44100, 1));
+            sprintf(path, "%s_aac.mp4", prefix);
+            break;
+        case CodecOpus:
+            track.reset(new OpusTrack());
+            sprintf(path, "%s_opus.mp4", prefix);
+            break;
+        case CodecG711A:
+        case CodecG711U:
+            track.reset(new G711Track(codec, 8000, 1, 16));
+            sprintf(path, "%s_711%c.mp4", prefix, codec == CodecG711A ? 'A' : 'U');
+            break;
+        default:
+            return;
+            break;
+        }
+        file.reset(new MP4Muxer());
+        file->openMP4(path);
+        file->addTrack(track);
+        enc.reset(new FFmpegEncoder(track));
+        enc->setOnEncode([this](const Frame::Ptr &frame) { file->inputFrame(frame); });
+    }
+    ~TransCtx() {
+        enc = nullptr;
+        file = nullptr;
+    }
+    void inputFrame(const FFmpegFrame::Ptr &frame) { enc->inputFrame(frame, false); }
+    FFmpegEncoder::Ptr enc;
+    std::shared_ptr<MP4Muxer> file;
+};
+
+int TranscodeAudio(const char *srcPath, const char *dstPath) {
+    MP4Demuxer srcMp4;
+    srcMp4.openMP4(srcPath);
+
+    auto srcTrack = srcMp4.getTrack(TrackAudio);
+    if (!srcTrack) {
+        printf("unable to find audioTrack %s\n", srcPath);
+        return -1;
+    }
+    std::vector<TransCtx::Ptr> trans;
+    FFmpegDecoder audioDec(srcTrack);
+    auto dstCodec = getCodecId(dstPath);
+    if (!strcasecmp(dstPath, "aac"))
+        dstCodec = CodecAAC;
+    if (dstCodec != CodecInvalid) {
+        std::string dstFile(srcPath);
+        auto pos = dstFile.rfind('_');
+        if (pos == dstFile.npos)
+            pos = dstFile.rfind('.');
+        if (pos != dstFile.npos)
+            dstFile = dstFile.substr(0, pos);
+        if (dstCodec == srcTrack->getCodecId()) {
+            printf("same codec %s, skip transcode\n", dstPath);
+            return 0;
+        }
+        trans.push_back(std::make_shared<TransCtx>(dstFile.c_str(), dstCodec));
+    }
+    else {
+        for (auto codec : { CodecAAC, CodecOpus, CodecG711A, CodecG711U }) {
+            if (codec == srcTrack->getCodecId())
+                continue;
+            trans.push_back(std::make_shared<TransCtx>(dstPath, codec));
+        }
+    }
+    // srcTrack -> audioDec
+    srcTrack->addDelegate([&](const Frame::Ptr &frame) -> bool {
+        audioDec.inputFrame(frame, true, false, false);
+        return true;
+    });
+    // audioDec -> audioEnc
+    audioDec.setOnDecode([&](const FFmpegFrame::Ptr &frame) {
+        for (TransCtx::Ptr p : trans)
+            p->inputFrame(frame);
+    });
+    toolkit::Ticker tick;
+    printf("startReadMp4 %" PRIu64 "ms\n", srcMp4.getDurationMS());
+    bool key, eof;
+    Frame::Ptr frame;
+    while (true) {
+        // srcMp4->srcTrack
+        frame = srcMp4.readFrame(key, eof);
+        if (eof) {
+            printf("eof break loop, it tooks %" PRIu64 " ms\n", tick.elapsedTime());
+            break;
+        }
+    }
+    return 0;
+}
+
+int main(int argc, char *argv[]) {
+    if (argc != 3) {
+        printf("usage src.mp4 dst_prefix/codecName\n");
+        return 0;
+    }
+    toolkit::Logger::Instance().add(std::make_shared<toolkit::ConsoleChannel>());
+    try {
+        return TranscodeAudio(argv[1], argv[2]);
+    } catch (std::exception e) {
+        printf("exception: %s\n", e.what());
+        return -1;
+    }
+}