总述
在前一篇文章中,讲解了如何将OPENH264编解码器集成到WebRTC中,但是OPENH264只能编码baseline的H264视频,而且就编码质量而言,还是X264最好,本文就来讲解一下如何将X264编码器集成到WebRTC中,为了实现解码,同时要用到ffmpeg。总体流程和之前一样,分为重新封装编解码器和注册调用两大步骤,注册调用这一步没有任何不同,主要是重新封装这一步骤有较大区别。
重新封装X264编码功能
首先当然还是要下载X264源码编译出相应的库以供调用。在windows下使用mingw进行编译,再使用poxports工具导出库,最后得到libx264.dll和libx264.lib,同时把x264.h和x264_config.h总共四个文件放到工程目录下,并在项目属性中进行相应配置。
使用x264进行视频编码的基本流程如下
#include <stdint.h> #include <stdio.h> #include <x264.h> int main( int argc, char **argv ) { int width, height; x264_param_t param; x264_picture_t pic; x264_picture_t pic_out; x264_t *h; int i_frame = 0; int i_frame_size; x264_nal_t *nal; int i_nal; /* Get default params for preset/tuning */ if( x264_param_default_preset( ¶m, "medium", NULL ) < 0 ) goto fail; /* Configure non-default params */ param.i_csp = X264_CSP_I420; param.i_width = width; param.i_height = height; param.b_vfr_input = 0; param.b_repeat_headers = 1; param.b_annexb = 1; /* Apply profile restrictions. */ if( x264_param_apply_profile( ¶m, "high" ) < 0 ) goto fail; if( x264_picture_alloc( &pic, param.i_csp, param.i_width, param.i_height ) < 0 ) goto fail; h = x264_encoder_open( ¶m); if( !h ) goto fail; int luma_size = width * height; int chroma_size = luma_size / 4; /* Encode frames */ for( ;; i_frame++ ) { /* Read input frame */ if( fread( pic.img.plane[0], 1, luma_size, stdin ) != luma_size ) break; if( fread( pic.img.plane[1], 1, chroma_size, stdin ) != chroma_size ) break; if( fread( pic.img.plane[2], 1, chroma_size, stdin ) != chroma_size ) break; pic.i_pts = i_frame; i_frame_size = x264_encoder_encode( h, &nal, &i_nal, &pic, &pic_out ); if( i_frame_size < 0 ) goto fail; else if( i_frame_size ) { if( !fwrite( nal->p_payload, i_frame_size, 1, stdout ) ) goto fail; } } /* Flush delayed frames */ while( x264_encoder_delayed_frames( h ) ) { i_frame_size = x264_encoder_encode( h, &nal, &i_nal, NULL, &pic_out ); if( i_frame_size < 0 ) goto fail; else if( i_frame_size ) { if( !fwrite( nal->p_payload, i_frame_size, 1, stdout ) ) goto fail; } } x264_encoder_close( h ); x264_picture_clean( &pic ); return 0; }
还是一样,照葫芦画瓢,改写上一篇文章中提到的H264EncoderImpl类
首先是类的定义,去掉了原来的私有成员变量ISVCEncoder* encoder_,加入了以下几项,其他内容不变
x264_picture_t pic; x264_picture_t pic_out; x264_t *encoder_; int i_frame = 0;//frame index x264_nal_t *nal;
相应的,构造函数和析构函数也要改变,这里就不赘述了,重点看InitEncode方法和Encode方法。
InitEncode方法的实现改写如下
int H264EncoderImpl::InitEncode(const VideoCodec* inst, int number_of_cores, size_t max_payload_size) { if (inst == NULL) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } if (inst->maxFramerate < 1) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } // allow zero to represent an unspecified maxBitRate if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } if (inst->width < 1 || inst->height < 1) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } if (number_of_cores < 1) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } int ret_val = Release(); if (ret_val < 0) { return ret_val; } /* Get default params for preset/tuning */ x264_param_t param; ret_val = x264_param_default_preset(¶m, "medium", NULL); if (ret_val != 0) { WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d", ret_val); x264_encoder_close(encoder_); encoder_ = NULL; return WEBRTC_VIDEO_CODEC_ERROR; } /* Configure non-default params */ param.i_csp = X264_CSP_I420; param.i_width = inst->width; param.i_height = inst->height; param.b_vfr_input = 0; param.b_repeat_headers = 1; param.b_annexb = 0;//这里设置为0,是为了使编码后的NAL统一有4字节的起始码,便于处理,否则会同时有3字节和4字节的起始码,很麻烦 param.i_fps_num = 1; param.i_fps_num = codec_.maxFramerate; param.rc.i_bitrate = codec_.maxBitrate; /* Apply profile restrictions. */ ret_val = x264_param_apply_profile(¶m, "high"); if (ret_val != 0) { WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d", ret_val); x264_encoder_close(encoder_); encoder_ = NULL; return WEBRTC_VIDEO_CODEC_ERROR; } ret_val = x264_picture_alloc(&pic, param.i_csp, param.i_width, param.i_height); if (ret_val != 0) { WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d", ret_val); x264_encoder_close(encoder_); encoder_ = NULL; return WEBRTC_VIDEO_CODEC_ERROR; } encoder_ = x264_encoder_open(¶m); if (!encoder_){ WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d", ret_val); x264_encoder_close(encoder_); x264_picture_clean(&pic); encoder_ = NULL; return WEBRTC_VIDEO_CODEC_ERROR; } if (&codec_ != inst) { codec_ = *inst; } if (encoded_image_._buffer != NULL) { delete[] encoded_image_._buffer; } encoded_image_._size = CalcBufferSize(kI420, codec_.width, codec_.height); encoded_image_._buffer = new uint8_t[encoded_image_._size]; encoded_image_._completeFrame = true; inited_ = true; WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1, "H264EncoderImpl::InitEncode(width:%d, height:%d, framerate:%d, start_bitrate:%d, max_bitrate:%d)", inst->width, inst->height, inst->maxFramerate, inst->startBitrate, inst->maxBitrate); return WEBRTC_VIDEO_CODEC_OK; }
Encode方法的实现改写如下
int H264EncoderImpl::Encode(const I420VideoFrame& input_image, const CodecSpecificInfo* codec_specific_info, const std::vector<VideoFrameType>* frame_types) { if (!inited_) { return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } if (input_image.IsZeroSize()) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } if (encoded_complete_callback_ == NULL) { return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } VideoFrameType frame_type = kDeltaFrame; // We only support one stream at the moment. if (frame_types && frame_types->size() > 0) { frame_type = (*frame_types)[0]; } bool send_keyframe = (frame_type == kKeyFrame); if (send_keyframe) { pic.b_keyframe = TRUE; WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1, "H264EncoderImpl::EncodeKeyFrame(width:%d, height:%d)", input_image.width(), input_image.height()); } // Check for change in frame size. if (input_image.width() != codec_.width || input_image.height() != codec_.height) { int ret = UpdateCodecFrameSize(input_image); if (ret < 0) { return ret; } } /* Read input frame */ pic.img.plane[0] = const_cast<uint8_t*>(input_image.buffer(kYPlane)); pic.img.plane[1] = const_cast<uint8_t*>(input_image.buffer(kUPlane)); pic.img.plane[2] = const_cast<uint8_t*>(input_image.buffer(kVPlane)); pic.i_pts = i_frame; int i_nal = 0; int i_frame_size = x264_encoder_encode(encoder_, &nal, &i_nal, &pic, &pic_out); if (i_frame_size < 0) { WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264EncoderImpl::Encode() fails to encode %d", i_frame_size); x264_encoder_close(encoder_); x264_picture_clean(&pic); encoder_ = NULL; return WEBRTC_VIDEO_CODEC_ERROR; } RTPFragmentationHeader frag_info; if (i_frame_size) { if (i_nal == 0) { return WEBRTC_VIDEO_CODEC_OK; } frag_info.VerifyAndAllocateFragmentationHeader(i_nal); encoded_image_._length = 0; uint32_t totalNaluIndex = 0; for (int nal_index = 0; nal_index < i_nal; nal_index++) { uint32_t currentNaluSize = 0; currentNaluSize = nal[nal_index].i_payload - 4; //x264_encoder_encode编码得到的nal单元是已经带有起始码的,此外,这里直接使用nal[index]即可,不必再使用x264_nal_encode函数 memcpy(encoded_image_._buffer + encoded_image_._length, nal[nal_index].p_payload + 4, currentNaluSize);//encoded_image_中存有的是去掉起始码的数据 encoded_image_._length += currentNaluSize; WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1, "H264EncoderImpl::Encode() nal_type %d, length:%d", nal[nal_index].i_type, encoded_image_._length); frag_info.fragmentationOffset[totalNaluIndex] = encoded_image_._length - currentNaluSize; frag_info.fragmentationLength[totalNaluIndex] = currentNaluSize; frag_info.fragmentationPlType[totalNaluIndex] = nal[nal_index].i_type; frag_info.fragmentationTimeDiff[totalNaluIndex] = 0; totalNaluIndex++; } } i_frame++; if (encoded_image_._length > 0) { encoded_image_._timeStamp = input_image.timestamp(); encoded_image_.capture_time_ms_ = input_image.render_time_ms(); encoded_image_._encodedHeight = codec_.height; encoded_image_._encodedWidth = codec_.width; encoded_image_._frameType = frame_type; // call back encoded_complete_callback_->Encoded(encoded_image_, NULL, &frag_info); } return WEBRTC_VIDEO_CODEC_OK; }
其他方法的实现均没有改变。
至此,X264编码器重新封装完毕,还是比较好理解的。
重新封装ffmpeg解码功能
首先还是一样,获得ffmpeg的头文件和库文件,加入工程中并进行相应设置,这里只需使用avcodec avformat avutil swscale四个库,头文件也可以做相应的删减。
ffmpeg解码的基本流程如下,实际集成之后是从WebRTC的EncodedImage& input_image中获得待解码数据的,所以不能使用常见的基于文件的解码流程
AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *codecCtx = avcodec_alloc_context3(codec); avcodec_open2(codecCtx, codec, nil); char *videoData; int len; AVFrame *frame = av_frame_alloc(); AVPacket packet; av_new_packet(&packet, len); memcpy(packet.data, videoData, len); int ret, got_picture; ret = avcodec_decode_video2(codecCtx, frame, &got_picture, &packet); if (ret > 0){ if(got_picture){ //进行下一步的处理 } }
相应的,对H264DecoderImpl类的定义和各方法的实现要进行改写。
首先是类的定义,去掉了ISVCDecoder* decoder_,加入了以下私有成员变量
AVCodecContext *pCodecCtx; AVCodec *pCodec; AVFrame *pFrame, *pFrameYUV; AVPacket *packet; struct SwsContext *img_convert_ctx; uint8_t *decode_buffer;//存储最开始收到的SPS、PPS和IDR帧以便进行最开始的解码 uint8_t *out_buffer; int framecnt = 0; int encoded_length = 0;
构造函数和析构函数的改写省略不表,重点看一下InitDecode方法和Decode方法
InitDecode方法改写如下
int H264DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) { if (inst == NULL) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } int ret_val = Release(); if (ret_val < 0) { return ret_val; } if (&codec_ != inst) { // Save VideoCodec instance for later; mainly for duplicating the decoder. codec_ = *inst; } pCodec = avcodec_find_decoder(AV_CODEC_ID_H264); pCodecCtx = avcodec_alloc_context3(pCodec); pCodecCtx->pix_fmt = PIX_FMT_YUV420P; pCodecCtx->width = codec_.width; pCodecCtx->height = codec_.height; //pCodecCtx->bit_rate = codec_.targetBitrate*1000; pCodecCtx->time_base.num = 1; pCodecCtx->time_base.den = codec_.maxFramerate; if (pCodec == NULL){ WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264DecoderImpl::InitDecode, Codec not found."); return WEBRTC_VIDEO_CODEC_ERROR; } if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0){ WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264DecoderImpl::InitDecode, Could not open codec."); return WEBRTC_VIDEO_CODEC_ERROR; } inited_ = true; // Always start with a complete key frame. key_frame_required_ = true; WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1, "H264DecoderImpl::InitDecode(width:%d, height:%d, framerate:%d, start_bitrate:%d, max_bitrate:%d)", inst->width, inst->height, inst->maxFramerate, inst->startBitrate, inst->maxBitrate); return WEBRTC_VIDEO_CODEC_OK; }
Decode方法的实现改写如下
int H264DecoderImpl::Decode(const EncodedImage& input_image, bool missing_frames, const RTPFragmentationHeader* fragmentation, const CodecSpecificInfo* codec_specific_info, int64_t /*render_time_ms*/) { if (!inited_) { WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264DecoderImpl::Decode, decoder is not initialized"); return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } if (decode_complete_callback_ == NULL) { WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264DecoderImpl::Decode, decode complete call back is not set"); return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } if (input_image._buffer == NULL) { WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264DecoderImpl::Decode, null buffer"); return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } if (!codec_specific_info) { WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264EncoderImpl::Decode, no codec info"); return WEBRTC_VIDEO_CODEC_ERROR; } if (codec_specific_info->codecType != kVideoCodecH264) { WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264EncoderImpl::Decode, non h264 codec %d", codec_specific_info->codecType); return WEBRTC_VIDEO_CODEC_ERROR; } WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1, "H264DecoderImpl::Decode(frame_type:%d, length:%d", input_image._frameType, input_image._length); if (framecnt < 2) {//存储最开始的SPS PPS 和 IDR帧以便进行初始的解码 memcpy(decode_buffer + encoded_length, input_image._buffer, input_image._length); encoded_length += input_image._length; framecnt++; } else { pFrame = av_frame_alloc(); pFrameYUV = av_frame_alloc(); out_buffer = (uint8_t *)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height)); avpicture_fill((AVPicture *)pFrameYUV, out_buffer, PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height); img_convert_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height, PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL); if (framecnt == 2) { packet = (AVPacket *)av_malloc(sizeof(AVPacket)); av_new_packet(packet, encoded_length); memcpy(packet->data, decode_buffer, encoded_length); av_free(decode_buffer); framecnt++; printf("\n\nLoading"); } else { packet = (AVPacket *)av_malloc(sizeof(AVPacket)); av_new_packet(packet, input_image._length); memcpy(packet->data, input_image._buffer, input_image._length); } int got_picture = 0; int ret = avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, packet); if (ret < 0){ WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1, "H264DecoderImpl::Decode, Decode Error."); return WEBRTC_VIDEO_CODEC_ERROR; } if (got_picture){ sws_scale(img_convert_ctx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, pCodecCtx->height, pFrameYUV->data, pFrameYUV->linesize); int size_y = pFrameYUV->linesize[0] * pCodecCtx->height; int size_u = pFrameYUV->linesize[1] * pCodecCtx->height / 2; int size_v = pFrameYUV->linesize[2] * pCodecCtx->height / 2; decoded_image_.CreateFrame(size_y, static_cast<uint8_t*>(pFrameYUV->data[0]), size_u, static_cast<uint8_t*>(pFrameYUV->data[1]), size_v, static_cast<uint8_t*>(pFrameYUV->data[2]), pCodecCtx->width, pCodecCtx->height, pFrameYUV->linesize[0], pFrameYUV->linesize[1], pFrameYUV->linesize[2]); decoded_image_.set_timestamp(input_image._timeStamp); decode_complete_callback_->Decoded(decoded_image_); return WEBRTC_VIDEO_CODEC_OK; } else printf("."); av_free_packet(packet); } return WEBRTC_VIDEO_CODEC_OK; }
其他方法的实现保持不变,至此ffmpeg解码功能的重新封装也完成了。
从最后实现的效果来看,X264的视频质量的确是最好的,但是播放端的解码延时比较高,暂时还不清楚原因,希望了解的朋友指教。
本项目源代码
版权声明:本文为博主原创文章,未经博主允许不得转载。