增加FFmpegEncoder和test_audio_transcode, 主要修改有:

- 使用FFmpegAudioFifo来简化代码 - 修复转码时间戳不规则递增，导致的音频破音和不连续问题 - 使用double来保存_tsp，避免出现2.5ms的frame_size(ffmpeg内置的opus)
2024-11-22 19:00:01 +08:00 · 2022-05-31 17:10:02 +08:00 · 2022-05-31 17:10:02 +08:00 · 5c92b4e48f
commit 5c92b4e48f
parent b33c227cf4
4 changed files with 611 additions and 20 deletions
--- a/src/Codec/Transcode.cpp
+++ b/src/Codec/Transcode.cpp
@ -13,10 +13,17 @@
 #include <dlfcn.h>
 #endif
 #include "Util/File.h"
 #include "Util/util.h"
 #include "Util/uv_errno.h"
 #include <float.h>
 #include "Transcode.h"
 #include "Extension/AAC.h"
 #include "Common/config.h"
 #include "Extension/Opus.h"
 #include "Extension/G711.h"
 #include "Extension/H264.h"
 #include "Extension/H265.h"
 #define MAX_DELAY_SECOND 3
 using namespace std;
@ -66,6 +73,9 @@ static bool setupFFmpeg_l() {
 #if (LIBAVCODEC_VERSION_MAJOR < 58)
    avcodec_register_all();
 #endif
    InfoL << "libavcodec " << LIBAVCODEC_VERSION_MAJOR << "." << LIBAVCODEC_VERSION_MINOR;
    //InfoL << "libavformat " << LIBAVFORMAT_VERSION_MAJOR << "." << LIBAVFORMAT_VERSION_MINOR;
    InfoL << "libavutil " << LIBAVUTIL_VERSION_MAJOR << "." << LIBAVUTIL_VERSION_MAJOR;
    return true;
 }
@ -411,21 +421,23 @@ FFmpegDecoder::FFmpegDecoder(const Track::Ptr &track, int thread_num, const std:
        _context->flags |= AV_CODEC_FLAG_LOW_DELAY;
        _context->flags2 |= AV_CODEC_FLAG2_FAST;
        if (track->getTrackType() == TrackVideo) {
-            _context->width = static_pointer_cast<VideoTrack>(track)->getVideoWidth();
+            auto video = static_pointer_cast<VideoTrack>(track);
-            _context->height = static_pointer_cast<VideoTrack>(track)->getVideoHeight();
+            _context->width = video->getVideoWidth();
-        }
+            _context->height = video->getVideoHeight();
-
+            InfoL << "decode video " << video->getCodecName() << " " << _context->width << "x" << _context->height;
-        switch (track->getCodecId()) {
+        } else {
-            case CodecG711A:
+            auto audio = static_pointer_cast<AudioTrack>(track);
-            case CodecG711U: {
+            InfoL << "decode audio " << audio->getCodecName() << " " << audio->getAudioSampleRate() << "x" << audio->getAudioChannel();
-                AudioTrack::Ptr audio = static_pointer_cast<AudioTrack>(track);
+            switch (track->getCodecId()) {
-                _context->channels = audio->getAudioChannel();
+                case CodecG711A:
-                _context->sample_rate = audio->getAudioSampleRate();
+                case CodecG711U: {
-                _context->channel_layout = av_get_default_channel_layout(_context->channels);
+                    _context->channels = audio->getAudioChannel();
-                break;
+                    _context->sample_rate = audio->getAudioSampleRate();
                    _context->channel_layout = av_get_default_channel_layout(_context->channels);
                    break;
                }
                default: break;
            }
            default:
                break;
        }
        AVDictionary *dict = nullptr;
        if (thread_num <= 0) {
@ -491,10 +503,6 @@ void FFmpegDecoder::flush() {
    }
 }
 const AVCodecContext *FFmpegDecoder::getContext() const {
    return _context.get();
 }
 bool FFmpegDecoder::inputFrame_l(const Frame::Ptr &frame, bool live, bool enable_merge) {
    if (_do_merger && enable_merge) {
        return _merger.inputFrame(frame, [this, live](uint64_t dts, uint64_t pts, const Buffer::Ptr &buffer, bool have_idr) {
@ -575,6 +583,82 @@ void FFmpegDecoder::onDecode(const FFmpegFrame::Ptr &frame) {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 FFmpegAudioFifo::~FFmpegAudioFifo() {
    if (_fifo) {
        av_audio_fifo_free(_fifo);
        _fifo = nullptr;
    }
 }
 int FFmpegAudioFifo::size() const {
    return _fifo ? av_audio_fifo_size(_fifo) : 0;
 }
 bool FFmpegAudioFifo::Write(const AVFrame *frame) {
    _format = (AVSampleFormat)frame->format;
    if (!_fifo) {
        _fifo = av_audio_fifo_alloc(_format, frame->channels, frame->nb_samples);
        if (!_fifo) {
            WarnL << "av_audio_fifo_alloc " << frame->channels << "x" << frame->nb_samples << "error";
            return false;
        }
    }
    _channels = frame->channels;
    if (_samplerate != frame->sample_rate) {
        _samplerate = frame->sample_rate;
        // 假定传入frame的时间戳是以ms为单位的
        _timebase = 1000.0 / _samplerate;
    }
    if (frame->pts != AV_NOPTS_VALUE) {
        // 计算fifo audio第一个采样的时间戳
        double tsp = frame->pts - _timebase * av_audio_fifo_size(_fifo);
        // flv.js和webrtc对音频时间戳增量有要求, rtc要求更加严格！
        // 得尽量保证时间戳是按照sample_size累加，否则容易出现破音或杂音等问题
        if (fabs(_tsp) < DBL_EPSILON || fabs(tsp - _tsp) > 200) {
            InfoL << "reset base_tsp " << (int64_t)_tsp << "->" << (int64_t)tsp;
            _tsp = tsp;
        }
    } else {
        _tsp = 0;
    }
    av_audio_fifo_write(_fifo, (void **)frame->data, frame->nb_samples);
    return true;
 }
 bool FFmpegAudioFifo::Read(AVFrame *frame, int sample_size) {
    assert(_fifo);
    int fifo_size = av_audio_fifo_size(_fifo);
    if (fifo_size < sample_size)
        return false;
    // fill linedata
    av_samples_get_buffer_size(frame->linesize, _channels, sample_size, _format, 0);
    frame->nb_samples = sample_size;
    frame->format = _format;
    frame->channel_layout = av_get_default_channel_layout(_channels);
    frame->sample_rate = _samplerate;
    if (fabs(_tsp) > DBL_EPSILON) {
        frame->pts = _tsp;
        // advance tsp by sample_size
        _tsp += sample_size * _timebase;
    }
    else {
        frame->pts = AV_NOPTS_VALUE;
    }
    int ret = av_frame_get_buffer(frame, 0);
    if (ret < 0) {
        WarnL << "av_frame_get_buffer error " << ffmpeg_err(ret);
        return false;
    }
    av_audio_fifo_read(_fifo, (void **)frame->data, sample_size);
    return true;
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 FFmpegSwr::FFmpegSwr(AVSampleFormat output, int channel, int channel_layout, int samplerate) {
    _target_format = output;
    _target_channels = channel;
@ -694,5 +778,334 @@ FFmpegFrame::Ptr FFmpegSws::inputFrame(const FFmpegFrame::Ptr &frame, int &ret,
    return nullptr;
 }
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 void setupContext(AVCodecContext *_context, int bitrate) {
    //保存AVFrame的引用
 #ifdef FF_API_OLD_ENCDEC
    _context->refcounted_frames = 1;
 #endif
    _context->flags |= AV_CODEC_FLAG_LOW_DELAY;
    _context->flags2 |= AV_CODEC_FLAG2_FAST;
    _context->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
    _context->time_base.num = 1;
    _context->time_base.den = 1000; // {1, 1000}
    _context->bit_rate = bitrate;
 }
 FFmpegEncoder::FFmpegEncoder(const Track::Ptr &track, int thread_num) {
    setupFFmpeg();
    const AVCodec *codec = nullptr;
    const AVCodec *codec_default = nullptr;
    _codecId = track->getCodecId();
    switch (_codecId) {
    case CodecH264:
        codec_default = getCodec<false>({ AV_CODEC_ID_H264 });
        if (checkIfSupportedNvidia()) {
            codec = getCodec<false>({ { "libopenh264" },
                                      { AV_CODEC_ID_H264 },
                                      { "h264_qsv" },
                                      { "h264_videotoolbox" },
                                      { "h264_cuvid" },
                                      { "h264_nvmpi" } });
        } else {
            codec = getCodec<false>({ { "libopenh264" }, { AV_CODEC_ID_H264 }, { "h264_qsv" }, { "h264_videotoolbox" }, { "h264_nvmpi" } });
        }
        break;
    case CodecH265:
        codec_default = getCodec<false>({ AV_CODEC_ID_HEVC });
        if (checkIfSupportedNvidia()) {
            codec = getCodec<false>({ { AV_CODEC_ID_HEVC }, { "hevc_qsv" }, { "hevc_videotoolbox" }, { "hevc_cuvid" }, { "hevc_nvmpi" } });
        } else {
            codec = getCodec<false>({ { AV_CODEC_ID_HEVC }, { "hevc_qsv" }, { "hevc_videotoolbox" }, { "hevc_nvmpi" } });
        }
        break;
    case CodecAAC:
        codec = getCodec<false>({ AV_CODEC_ID_AAC });
        break;
    case CodecG711A:
        codec = getCodec<false>({ AV_CODEC_ID_PCM_ALAW });
        break;
    case CodecG711U:
        codec = getCodec<false>({ AV_CODEC_ID_PCM_MULAW });
        break;
    case CodecOpus:
        codec = getCodec<false>({ AV_CODEC_ID_OPUS });
        break;
    case CodecVP8:
        codec = getCodec<false>({ AV_CODEC_ID_VP8 });
        break;
    case CodecVP9:
        codec = getCodec<false>({ AV_CODEC_ID_VP9 });
        break;
    default:
        break;
    }
    if (!codec) {
        throw std::runtime_error("未找到编码器");
    }
    if (thread_num <= 0) {
        av_dict_set(&_dict, "threads", "auto", 0);
    } else {
        av_dict_set(&_dict, "threads", to_string(MIN(thread_num, thread::hardware_concurrency())).data(), 0);
    }
    av_dict_set(&_dict, "zerolatency", "1", 0);
    if (strcmp(codec->name, "libx264") == 0 || strcmp(codec->name, "libx265") == 0) {
        av_dict_set(&_dict, "preset", "ultrafast", 0);
    }
    while (true) {
        bool ret = false;
        if (getTrackType() == TrackVideo) {
            // 不设置时，仅第一个I帧前存一次sps和pps
            // 设置后，I帧钱不会存储sps和pps, 但_context->extradata会有数据，需要手动处理
            // _context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
            VideoTrack::Ptr video = static_pointer_cast<VideoTrack>(track);
            ret = openVideoCodec(video->getVideoWidth(), video->getVideoHeight(), track->getBitRate(), codec);
        } else {
            AudioTrack::Ptr audio = static_pointer_cast<AudioTrack>(track);
            ret = openAudioCodec(audio->getAudioSampleRate(), audio->getAudioChannel(), track->getBitRate(), codec);
        }
        if (ret) {
            _codec = codec;
            //成功
            InfoL << "打开编码器成功:" << codec->name << ", frameSize " << _context->frame_size;
            // we do not send complete frames, check this
            if (getTrackType() == TrackAudio) {
                var_frame_size = codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE;
                if (var_frame_size) {
                    InfoL << codec->name << " support var frame_size";
                }
            }
            break;
        }
        if (codec_default && codec_default != codec) {
            //硬件编解码器打开失败，尝试软件的
            WarnL << "打开编码器" << codec->name << "失败，原因是:" << ffmpeg_err(ret) << ", 再尝试打开编码器"
                  << codec_default->name;
            codec = codec_default;
            continue;
        }
        throw std::runtime_error(StrPrinter << "打开编码器" << codec->name << "失败:" << ffmpeg_err(ret));
    }
 }
 FFmpegEncoder::~FFmpegEncoder() {
    stopThread(true);
    flush();
    av_dict_free(&_dict);
 }
 bool FFmpegEncoder::openVideoCodec(int width, int height, int bitrate, const AVCodec *codec) {
    _context.reset(avcodec_alloc_context3(codec), [](AVCodecContext *ctx) { avcodec_free_context(&ctx); });
    if (_context) {
        setupContext(_context.get(), bitrate);
        // 不设置时，仅第一个I帧前存一次sps和pps
        // 设置后，I帧钱不会存储sps和pps, 但_context->extradata会有数据，需要手动处理
        // _context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
        _context->width = width;
        _context->height = height;
        // gop
        _context->gop_size = 200;
        // 禁用b帧
        _context->max_b_frames = 0;
        _context->has_b_frames = 0;
        InfoL << "openVideoCodec " << codec->name << " " << _context->width << "x" << _context->height;
        _context->pix_fmt = AV_PIX_FMT_YUV420P; // codec->pix_fmts[0];
        // sws_.reset(new FFmpegSws(_context->pix_fmt, _context->width, _context->height));
        return avcodec_open2(_context.get(), codec, &_dict) >= 0;
    }
    return false;
 }
 bool FFmpegEncoder::openAudioCodec(int samplerate, int channel, int bitrate, const AVCodec *codec) {
    _context.reset(avcodec_alloc_context3(codec), [](AVCodecContext *ctx) { avcodec_free_context(&ctx); });
    if (_context) {
        setupContext(_context.get(), bitrate);
        _context->sample_fmt = codec->sample_fmts[0];
        _context->sample_rate = samplerate;
        _context->channels = channel;
        _context->channel_layout = av_get_default_channel_layout(_context->channels);
        if (getCodecId() == CodecOpus)
            _context->compression_level = 1;
        //_sample_bytes = av_get_bytes_per_sample(_context->sample_fmt) * _context->channels;
        _swr.reset(
            new FFmpegSwr(_context->sample_fmt, _context->channels, _context->channel_layout, _context->sample_rate));
        InfoL << "openAudioCodec " << codec->name << " " << _context->sample_rate << "x" << _context->channels;
        return avcodec_open2(_context.get(), codec, &_dict) >= 0;
    }
    return false;
 }
 void FFmpegEncoder::flush() {
    while (true) {
        auto packet = alloc_av_packet();
        auto ret = avcodec_receive_packet(_context.get(), packet.get());
        if (ret == AVERROR(EAGAIN)) {
            avcodec_send_frame(_context.get(), nullptr);
            continue;
        }
        if (ret == AVERROR_EOF) {
            break;
        }
        if (ret < 0) {
            WarnL << "avcodec_receive_frame failed:" << ffmpeg_err(ret);
            break;
        }
        onEncode(packet.get());
    }
 }
 bool FFmpegEncoder::inputFrame(const FFmpegFrame::Ptr &frame, bool async) {
    if (async && !TaskManager::isEnabled() && getContext()->codec_type == AVMEDIA_TYPE_VIDEO) {
        //开启异步编码，且为视频，尝试启动异步解码线程
        startThread("encoder thread");
    }
    if (!async || !TaskManager::isEnabled()) {
        return inputFrame_l(frame);
    }
    return addEncodeTask([this, frame]() { inputFrame_l(frame); });
 }
 bool FFmpegEncoder::inputFrame_l(FFmpegFrame::Ptr input) {
    AVFrame *frame = input->get();
    AVCodecContext *context = _context.get();
    if (getTrackType() == TrackAudio) {
        if (_swr) {
            // 转成同样采样率和通道
            input = _swr->inputFrame(input);
            frame = input->get();
            // 保证每次塞给解码器的都是一帧音频
            if (!var_frame_size && _context->frame_size && frame->nb_samples != _context->frame_size) {
                // add this frame to _audio_buffer
                if (!_fifo)
                    _fifo.reset(new FFmpegAudioFifo());
                // TraceL << "in " << frame->pts << ",samples " << frame->nb_samples;
                _fifo->Write(frame);
                while (1) {
                    FFmpegFrame audio_frame;
                    if (!_fifo->Read(audio_frame.get(), _context->frame_size)){
                        break;
                    }
                    if (!encodeFrame(audio_frame.get())) {
                        break;
                    }
                }
                return true;
            }
        }
    } else {
        if (frame->format != context->pix_fmt || frame->width != context->width || frame->height != context->height) {
            if (_sws) {
                input = _sws->inputFrame(input);
                frame = input->get();
            } else {
                // @todo reopen videocodec?
                openVideoCodec(frame->width, frame->height, 512000, _codec);
            }
        }
    }
    return encodeFrame(frame);
 }
 bool FFmpegEncoder::encodeFrame(AVFrame *frame) {
    // TraceL << "enc " << frame->pts;
    int ret = avcodec_send_frame(_context.get(), frame);
    if (ret < 0) {
        WarnL << "Error sending a frame " << frame->pts << " to the encoder: " << ffmpeg_err(ret);
        return false;
    }
    while (ret >= 0) {
        auto packet = alloc_av_packet();
        ret = avcodec_receive_packet(_context.get(), packet.get());
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            break;
        else if (ret < 0) {
            WarnL << "Error encoding a frame: " << ffmpeg_err(ret);
            return false;
        }
        // TraceL << "out " << packet->pts << "," << packet->dts << ", size: " << packet->size;
        onEncode(packet.get());
    }
    return true;
 }
 void FFmpegEncoder::onEncode(AVPacket *packet) {
    // process frame
    if (!_cb)
        return;
    switch (_codecId) {
        case CodecH264: {
            auto frame = FrameImp::create<H264Frame>();
            frame->_dts = packet->dts;
            frame->_pts = packet->pts;
            frame->_buffer.assign((const char *)packet->data, packet->size);
            frame->_prefix_size = prefixSize((const char *)packet->data, packet->size);
            _cb(frame);
            break;
        }
        case CodecH265: {
            auto frame = FrameImp::create<H265Frame>();
            frame->_dts = packet->dts;
            frame->_pts = packet->pts;
            frame->_buffer.assign((const char *)packet->data, packet->size);
            frame->_prefix_size = prefixSize((const char *)packet->data, packet->size);
            _cb(frame);
            break;
        }
        case CodecAAC: {
            auto frame = FrameImp::create<>();
            frame->_codec_id = _codecId;
            frame->_dts = packet->dts;
            frame->_pts = packet->pts;
            frame->_buffer.reserve(ADTS_HEADER_LEN + packet->size);
            if (_context && _context->extradata && _context->extradata_size) {
                uint8_t adts[ADTS_HEADER_LEN];
                auto cfg = std::string((const char *)_context->extradata, _context->extradata_size);
                dumpAacConfig(cfg, packet->size, adts, ADTS_HEADER_LEN);
                frame->_prefix_size = ADTS_HEADER_LEN;
                frame->_buffer.append((char*)adts, ADTS_HEADER_LEN);
            }
            frame->_buffer.append((const char *)packet->data, packet->size);
            _cb(frame);
            break;
        }
        case CodecOpus:
        case CodecG711A:
        case CodecG711U: {
            auto frame = FrameImp::create<>();
            frame->_codec_id = _codecId;
            frame->_dts = packet->dts;
            frame->_pts = packet->pts;
            frame->_buffer.assign((const char *)packet->data, packet->size);
            _cb(frame);
            break;
        }
        case CodecVP8:
        case CodecVP9: {
            auto frame = FrameImp::create<>();
            frame->_codec_id = _codecId;
            frame->_dts = packet->dts;
            frame->_pts = packet->pts;
            frame->_buffer.assign((const char *)packet->data, packet->size);
            _cb(frame);
            break;
        }
        default: break;
    }
 }
 } //namespace mediakit
 #endif//ENABLE_FFMPEG
--- a/src/Codec/Transcode.h
+++ b/src/Codec/Transcode.h
@ -63,6 +63,24 @@ private:
    SwrContext *_ctx = nullptr;
 };
 class FFmpegAudioFifo {
 public:
    FFmpegAudioFifo() = default;
    ~FFmpegAudioFifo();
    bool Write(const AVFrame *frame);
    bool Read(AVFrame *frame, int sample_size);
    int size() const;
 private:
    int _channels = 0;
    int _samplerate = 0;
    double _tsp = 0;
    double _timebase = 0;
    AVAudioFifo *_fifo = nullptr;
    AVSampleFormat _format = AV_SAMPLE_FMT_NONE;
 };
 class TaskManager {
 public:
    TaskManager() = default;
@ -108,7 +126,7 @@ public:
    bool inputFrame(const Frame::Ptr &frame, bool live, bool async, bool enable_merge = true);
    void setOnDecode(onDec cb);
    void flush();
-    const AVCodecContext *getContext() const;
+    const AVCodecContext *getContext() const { return _context.get(); }
 private:
    void onDecode(const FFmpegFrame::Ptr &frame);
@ -145,6 +163,41 @@ private:
    AVPixelFormat _target_format = AV_PIX_FMT_NONE;
 };
 class FFmpegEncoder : public TaskManager, public CodecInfo {
 public:
    using Ptr = std::shared_ptr<FFmpegEncoder>;
    using onEnc = std::function<void(const Frame::Ptr &)>;
    FFmpegEncoder(const Track::Ptr &track, int thread_num = 2);
    ~FFmpegEncoder() override;
    void flush();
    CodecId getCodecId() const override { return _codecId; }
    const AVCodecContext *getContext() const { return _context.get(); }
    void setOnEncode(onEnc cb) { _cb = std::move(cb); }
    bool inputFrame(const FFmpegFrame::Ptr &frame, bool async);
 private:
    bool inputFrame_l(FFmpegFrame::Ptr frame);
    bool encodeFrame(AVFrame *frame);
    void onEncode(AVPacket *packet);
    bool openVideoCodec(int width, int height, int bitrate, const AVCodec *codec);
    bool openAudioCodec(int samplerate, int channel, int bitrate, const AVCodec *codec);
 private:
    onEnc _cb;
    CodecId _codecId;
    const AVCodec *_codec = nullptr;
    AVDictionary *_dict = nullptr;
    std::shared_ptr<AVCodecContext> _context;
    std::unique_ptr<FFmpegSws> _sws;
    std::unique_ptr<FFmpegSwr> _swr;
    std::unique_ptr<FFmpegAudioFifo> _fifo;
    bool var_frame_size = false;
 };
 }//namespace mediakit
 #endif// ENABLE_FFMPEG
 #endif //ZLMEDIAKIT_TRANSCODE_H
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -33,7 +33,14 @@ foreach(TEST_SRC ${TEST_SRC_LIST})
    endif()
  endif()
-  message(STATUS "add test: ${TEST_EXE_NAME}")
+  if (NOT ENABLE_FFMPEG)
    # 暂时过滤掉依赖 WebRTC 的测试模块
    if ("${TEST_EXE_NAME}" MATCHES "test_audio_transcode")
      continue()
    endif ()
  endif ()
  message(STATUS "add test:${TEST_EXE_NAME}")
  add_executable(${TEST_EXE_NAME} ${TEST_SRC})
  target_compile_options(${TEST_EXE_NAME}
    PRIVATE ${COMPILE_OPTIONS_DEFAULT})
--- a/tests/test_audio_transcode.cpp
+++ b/tests/test_audio_transcode.cpp
@ -0,0 +1,118 @@
 #include "Codec/Transcode.h"
 #include "Record/MP4Demuxer.h"
 #include "Record/MP4Muxer.h"
 #include "Extension/AAC.h"
 #include "Extension/Opus.h"
 #include "Extension/G711.h"
 #include "Util/logger.h"
 using namespace mediakit;
 struct TransCtx {
    using Ptr = std::shared_ptr<TransCtx>;
    TransCtx(const char *prefix, CodecId codec) {
        char path[256];
        Track::Ptr track;
        switch (codec) {
        case CodecAAC:
            track.reset(new AACTrack(44100, 1));
            sprintf(path, "%s_aac.mp4", prefix);
            break;
        case CodecOpus:
            track.reset(new OpusTrack());
            sprintf(path, "%s_opus.mp4", prefix);
            break;
        case CodecG711A:
        case CodecG711U:
            track.reset(new G711Track(codec, 8000, 1, 16));
            sprintf(path, "%s_711%c.mp4", prefix, codec == CodecG711A ? 'A' : 'U');
            break;
        default:
            return;
            break;
        }
        file.reset(new MP4Muxer());
        file->openMP4(path);
        file->addTrack(track);
        enc.reset(new FFmpegEncoder(track));
        enc->setOnEncode([this](const Frame::Ptr &frame) { file->inputFrame(frame); });
    }
    ~TransCtx() {
        enc = nullptr;
        file = nullptr;
    }
    void inputFrame(const FFmpegFrame::Ptr &frame) { enc->inputFrame(frame, false); }
    FFmpegEncoder::Ptr enc;
    std::shared_ptr<MP4Muxer> file;
 };
 int TranscodeAudio(const char *srcPath, const char *dstPath) {
    MP4Demuxer srcMp4;
    srcMp4.openMP4(srcPath);
    auto srcTrack = srcMp4.getTrack(TrackAudio);
    if (!srcTrack) {
        printf("unable to find audioTrack %s\n", srcPath);
        return -1;
    }
    std::vector<TransCtx::Ptr> trans;
    FFmpegDecoder audioDec(srcTrack);
    auto dstCodec = getCodecId(dstPath);
    if (!strcasecmp(dstPath, "aac"))
        dstCodec = CodecAAC;
    if (dstCodec != CodecInvalid) {
        std::string dstFile(srcPath);
        auto pos = dstFile.rfind('_');
        if (pos == dstFile.npos)
            pos = dstFile.rfind('.');
        if (pos != dstFile.npos)
            dstFile = dstFile.substr(0, pos);
        if (dstCodec == srcTrack->getCodecId()) {
            printf("same codec %s, skip transcode\n", dstPath);
            return 0;
        }
        trans.push_back(std::make_shared<TransCtx>(dstFile.c_str(), dstCodec));
    }
    else {
        for (auto codec : { CodecAAC, CodecOpus, CodecG711A, CodecG711U }) {
            if (codec == srcTrack->getCodecId())
                continue;
            trans.push_back(std::make_shared<TransCtx>(dstPath, codec));
        }
    }
    // srcTrack -> audioDec
    srcTrack->addDelegate([&](const Frame::Ptr &frame) -> bool {
        audioDec.inputFrame(frame, true, false, false);
        return true;
    });
    // audioDec -> audioEnc
    audioDec.setOnDecode([&](const FFmpegFrame::Ptr &frame) {
        for (TransCtx::Ptr p : trans)
            p->inputFrame(frame);
    });
    toolkit::Ticker tick;
    printf("startReadMp4 %" PRIu64 "ms\n", srcMp4.getDurationMS());
    bool key, eof;
    Frame::Ptr frame;
    while (true) {
        // srcMp4->srcTrack
        frame = srcMp4.readFrame(key, eof);
        if (eof) {
            printf("eof break loop, it tooks %" PRIu64 " ms\n", tick.elapsedTime());
            break;
        }
    }
    return 0;
 }
 int main(int argc, char *argv[]) {
    if (argc != 3) {
        printf("usage src.mp4 dst_prefix/codecName\n");
        return 0;
    }
    toolkit::Logger::Instance().add(std::make_shared<toolkit::ConsoleChannel>());
    try {
        return TranscodeAudio(argv[1], argv[2]);
    } catch (std::exception e) {
        printf("exception: %s\n", e.what());
        return -1;
    }
 }