大多数厂家摄像机输出的音频流格式都是PCM,有一些场合(比如讲音视频流保存成Ts流)需要将PCM格式转成AAC格式。基本的思路是先解码得到音频帧,再将音频帧编码成AAC格式。编码和解码之间需要添加一个filter。filter起到适配的作用。
首先解码:
AVFrame * decode(AVPacket* sample) { int gotframe = 0; AVFrame* frame = av_frame_alloc(); AVFrame *filt_frame = nullptr; auto length = avcodec_decode_audio4(decoderContext, frame, &gotframe, sample); frame->pts = frame->pkt_pts; if(length >= 0 && gotframe != 0) { if (av_buffersrc_add_frame_flags(buffersrc_ctx, frame, AV_BUFFERSRC_FLAG_PUSH) < 0) { av_log(NULL, AV_LOG_ERROR, "Error while feeding the audio filtergraph\n"); av_frame_free(&frame); return nullptr; } frame->pts = AV_NOPTS_VALUE; /* pull filtered audio from the filtergraph */ filt_frame = av_frame_alloc(); while (1) { int ret = av_buffersink_get_frame_flags(buffersink_ctx, filt_frame, AV_BUFFERSINK_FLAG_NO_REQUEST); if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break; if(ret < 0) { av_frame_free(&frame); av_frame_free(&filt_frame); return nullptr; } int64_t frame_pts = AV_NOPTS_VALUE; if (filt_frame->pts != AV_NOPTS_VALUE) { startTime = (startTime == AV_NOPTS_VALUE) ? 0 : startTime; AVRational av_time_base_q; av_time_base_q.num = 1; av_time_base_q.den = AV_TIME_BASE; filt_frame->pts = frame_pts = av_rescale_q(filt_frame->pts, buffersink_ctx->inputs[0]->time_base, encoderContext->time_base) - av_rescale_q(startTime, av_time_base_q, encoderContext->time_base); } av_frame_free(&frame); return filt_frame; } } av_frame_free(&filt_frame); av_frame_free(&frame); return nullptr; }
decode 得到AVFrame 也即音频帧,这个frame是不能做为编码的源要经过filter,原因之一是有些摄像机输出的音频包每个packet是320个字节,AAC每个Packet是1024个字节。
初始化Filter:
int initFilters() { char args[512]; int ret; AVFilter *abuffersrc = avfilter_get_by_name("abuffer"); AVFilter *abuffersink = avfilter_get_by_name("abuffersink"); AVFilterInOut *outputs = avfilter_inout_alloc(); AVFilterInOut *inputs = avfilter_inout_alloc(); static const enum AVSampleFormat out_sample_fmts[] = { AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }; static const int64_t out_channel_layouts[] = { AV_CH_LAYOUT_MONO, -1 }; static const int out_sample_rates[] = {decoderContext->sample_rate , -1 }; AVRational time_base = input->time_base; filter_graph = avfilter_graph_alloc(); /* buffer audio source: the decoded frames from the decoder will be inserted here. */ if (!decoderContext->channel_layout) decoderContext->channel_layout = av_get_default_channel_layout(decoderContext->channels); sprintf_s(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", time_base.num, time_base.den, decoderContext->sample_rate, av_get_sample_fmt_name(decoderContext->sample_fmt), decoderContext->channel_layout); ret = avfilter_graph_create_filter(&buffersrc_ctx, abuffersrc, "in", args, NULL, filter_graph); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n"); return ret; } /* buffer audio sink: to terminate the filter chain. */ ret = avfilter_graph_create_filter(&buffersink_ctx, abuffersink, "out", NULL, NULL, filter_graph); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n"); return ret; } ret = av_opt_set_int_list(buffersink_ctx, "sample_fmts", out_sample_fmts, -1, AV_OPT_SEARCH_CHILDREN); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n"); return ret; } ret = av_opt_set_int_list(buffersink_ctx, "channel_layouts", out_channel_layouts, -1, AV_OPT_SEARCH_CHILDREN); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n"); return ret; } ret = av_opt_set_int_list(buffersink_ctx, "sample_rates", out_sample_rates, -1, AV_OPT_SEARCH_CHILDREN); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n"); return ret; } /* Endpoints for the filter graph. */ outputs->name = av_strdup("in"); outputs->filter_ctx = buffersrc_ctx; outputs->pad_idx = 0; outputs->next = NULL; inputs->name = av_strdup("out"); inputs->filter_ctx = buffersink_ctx; inputs->pad_idx = 0; inputs->next = NULL; if ((ret = avfilter_graph_parse_ptr(filter_graph, "anull", &inputs, &outputs, nullptr)) < 0) return ret; if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) return ret; av_buffersink_set_frame_size(buffersink_ctx, 1024); return 0; }
Filter可以简理解为FIFO(当然实际上不是)输入是解码后的AVFrame,输出是编码的源头。AVFrame 经过Filter以后就可以编码了。
shared_ptr<AVPacket> encode(AVFrame * frame) { int gotpacket = 0; shared_ptr<AVPacket> packet((AVPacket*)av_malloc(sizeof(AVPacket)), [&](AVPacket *p){av_free_packet(p);av_freep(&p);}); auto pkt = packet.get(); av_init_packet(pkt); pkt->data = nullptr; pkt->size = 0; frame->nb_samples = encoderContext->frame_size; frame->format = encoderContext->sample_fmt; frame->channel_layout = encoderContext->channel_layout; int hr = avcodec_encode_audio2(encoderContext.get(), pkt, frame, &gotpacket); av_frame_free(&frame); if(gotpacket) { if (pkt->pts != AV_NOPTS_VALUE) pkt->pts = av_rescale_q(pkt->pts, encoderContext->time_base, output->time_base); if (pkt->dts != AV_NOPTS_VALUE) pkt->dts = av_rescale_q(pkt->dts, encoderContext->time_base,output->time_base); if (pkt->duration > 0) pkt->duration = int(av_rescale_q(pkt->duration, encoderContext->time_base, output->time_base)); return packet; } return nullptr; }
实际运用中我们用到了智能指针shared_ptr<AVPacket>,也可以不用。但是要注意内存泄露问题。如果程序运行在多核上,建议AVFilterGraph 中thread设置为1.以上代码久经考验。放心使用。如果有什么问题,可以联系我 350197870.
时间: 2024-10-10 02:29:08