FAAC源码分析之faacEncEncode

FAAC编码代码流程图

通用的AAC编码系统框图(偷来的图

对比可以发现,其实FAAC的编码和一般的AAC编码大致一致,主要包括心理声学模型处理部分和量化处理两部分,还包括一些优化处理的过程。实现的源码+注释:

int FAACAPI faacEncEncode(faacEncHandle hEncoder,
	int32_t *inputBuffer,
	unsigned int samplesInput,
	unsigned char *outputBuffer,
	unsigned int bufferSize
	)
{
	unsigned int channel, i;
	int sb, frameBytes;
	unsigned int offset;
	BitStream *bitStream; /* bitstream used for writing the frame to */
	TnsInfo *tnsInfo_for_LTP;
	TnsInfo *tnsDecInfo;
#ifdef DRM
	int desbits, diff;
	double fix;
#endif

	/* local copy's of parameters */
	ChannelInfo *channelInfo = hEncoder->channelInfo;
	CoderInfo *coderInfo = hEncoder->coderInfo;
	unsigned int numChannels = hEncoder->numChannels;
	unsigned int sampleRate = hEncoder->sampleRate;
	unsigned int aacObjectType = hEncoder->config.aacObjectType;
	unsigned int mpegVersion = hEncoder->config.mpegVersion;
	unsigned int useLfe = hEncoder->config.useLfe;
	unsigned int useTns = hEncoder->config.useTns;
	unsigned int allowMidside = hEncoder->config.allowMidside;
	unsigned int bandWidth = hEncoder->config.bandWidth;
	unsigned int shortctl = hEncoder->config.shortctl;

	/* Increase frame number */
	hEncoder->frameNum++;

	if (samplesInput == 0)
		hEncoder->flushFrame++;

	/* After 4 flush frames all samples have been encoded,
	return 0 bytes written */
	if (hEncoder->flushFrame > 4)
		return 0;

	/* Determine the channel configuration */
	GetChannelInfo(channelInfo, numChannels, useLfe);

	/* Update current sample buffers */
	for (channel = 0; channel < numChannels; channel++)
	{
		double *tmp;

		if (hEncoder->sampleBuff[channel]) {
			for(i = 0; i < FRAME_LEN; i++) {
				hEncoder->ltpTimeBuff[channel][i] = hEncoder->sampleBuff[channel][i];
			}
		}
		if (hEncoder->nextSampleBuff[channel]) {
			for(i = 0; i < FRAME_LEN; i++) {
				hEncoder->ltpTimeBuff[channel][FRAME_LEN + i] =
					hEncoder->nextSampleBuff[channel][i];
			}
		}

		if (!hEncoder->sampleBuff[channel])
			hEncoder->sampleBuff[channel] = (double*)AllocMemory(FRAME_LEN*sizeof(double));

		tmp = hEncoder->sampleBuff[channel];

		hEncoder->sampleBuff[channel]		= hEncoder->nextSampleBuff[channel];
		hEncoder->nextSampleBuff[channel]	= hEncoder->next2SampleBuff[channel];
		hEncoder->next2SampleBuff[channel]	= hEncoder->next3SampleBuff[channel];
		hEncoder->next3SampleBuff[channel]	= tmp;

		if (samplesInput == 0)
		{
			/* start flushing*/
			for (i = 0; i < FRAME_LEN; i++)
				hEncoder->next3SampleBuff[channel][i] = 0.0;
		}
		else
		{
			int samples_per_channel = samplesInput/numChannels;

			/* handle the various input formats and channel remapping */
			switch( hEncoder->config.inputFormat )
			{

			case FAAC_INPUT_16BIT:
				{
					short *input_channel = (short*)inputBuffer + hEncoder->config.channel_map[channel];

					for (i = 0; i < samples_per_channel; i++)
					{
						hEncoder->next3SampleBuff[channel][i] = (double)*input_channel;
						input_channel += numChannels;
					}
				}
				break;

			case FAAC_INPUT_32BIT:
				{
					int32_t *input_channel = (int32_t*)inputBuffer + hEncoder->config.channel_map[channel];

					for (i = 0; i < samples_per_channel; i++)
					{
						hEncoder->next3SampleBuff[channel][i] = (1.0/256) * (double)*input_channel;
						input_channel += numChannels;
					}
				}
				break;

			case FAAC_INPUT_FLOAT:
				{
					float *input_channel = (float*)inputBuffer + hEncoder->config.channel_map[channel];

					for (i = 0; i < samples_per_channel; i++)
					{
						hEncoder->next3SampleBuff[channel][i] = (double)*input_channel;
						input_channel += numChannels;
					}
				}
				break;

			default:
				return -1; /* invalid input format */
				break;
			}

			for (i = (int)(samplesInput/numChannels); i < FRAME_LEN; i++)
				hEncoder->next3SampleBuff[channel][i] = 0.0;
		}

		/* Psychoacoustics */
		/* Update buffers and run FFT on new samples */
		/* LFE psychoacoustic can run without it */
		if (!channelInfo[channel].lfe || channelInfo[channel].cpe)
		{
			// 心理声学模型的缓冲区更新, 计算当前帧能量值
			hEncoder->psymodel->PsyBufferUpdate(
				&hEncoder->fft_tables,
				&hEncoder->gpsyInfo,
				&hEncoder->psyInfo[channel],
				hEncoder->next3SampleBuff[channel],
				bandWidth,
				hEncoder->srInfo->cb_width_short,
				hEncoder->srInfo->num_cb_short);
		}
	}

	if (hEncoder->frameNum <= 3) /* Still filling up the buffers */
		return 0;

	// 内部调用实现检测瞬变信号, 判断长短块
	/* Psychoacoustics */
	hEncoder->psymodel->PsyCalculate(channelInfo, &hEncoder->gpsyInfo, hEncoder->psyInfo,
		hEncoder->srInfo->cb_width_long, hEncoder->srInfo->num_cb_long,
		hEncoder->srInfo->cb_width_short,
		hEncoder->srInfo->num_cb_short, numChannels);

	// 长短块切换
	hEncoder->psymodel->BlockSwitch(coderInfo, hEncoder->psyInfo, numChannels);

	/* force block type */
	if (shortctl == SHORTCTL_NOSHORT)
	{
		for (channel = 0; channel < numChannels; channel++)
		{
			coderInfo[channel].block_type = ONLY_LONG_WINDOW;
		}
	}
	if (shortctl == SHORTCTL_NOLONG)
	{
		for (channel = 0; channel < numChannels; channel++)
		{
			coderInfo[channel].block_type = ONLY_SHORT_WINDOW;
		}
	}

	/* AAC Filterbank, MDCT with overlap and add */
	for (channel = 0; channel < numChannels; channel++) {
		int k;

		FilterBank(hEncoder,
			&coderInfo[channel],
			hEncoder->sampleBuff[channel],
			hEncoder->freqBuff[channel],
			hEncoder->overlapBuff[channel],
			MOVERLAPPED);

		if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
			for (k = 0; k < 8; k++) {
				specFilter(hEncoder->freqBuff[channel]+k*BLOCK_LEN_SHORT,
					sampleRate, bandWidth, BLOCK_LEN_SHORT);
			}
		} else {
			specFilter(hEncoder->freqBuff[channel], sampleRate,
				bandWidth, BLOCK_LEN_LONG);
		}
	}

	/* TMP: Build sfb offset table and other stuff */
	for (channel = 0; channel < numChannels; channel++) {
		channelInfo[channel].msInfo.is_present = 0;

		if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
			coderInfo[channel].max_sfb = hEncoder->srInfo->num_cb_short;
			coderInfo[channel].nr_of_sfb = hEncoder->srInfo->num_cb_short;

			coderInfo[channel].num_window_groups = 1;
			coderInfo[channel].window_group_length[0] = 8;
			coderInfo[channel].window_group_length[1] = 0;
			coderInfo[channel].window_group_length[2] = 0;
			coderInfo[channel].window_group_length[3] = 0;
			coderInfo[channel].window_group_length[4] = 0;
			coderInfo[channel].window_group_length[5] = 0;
			coderInfo[channel].window_group_length[6] = 0;
			coderInfo[channel].window_group_length[7] = 0;

			offset = 0;
			for (sb = 0; sb < coderInfo[channel].nr_of_sfb; sb++) {
				coderInfo[channel].sfb_offset[sb] = offset;
				offset += hEncoder->srInfo->cb_width_short[sb];
			}
			coderInfo[channel].sfb_offset[coderInfo[channel].nr_of_sfb] = offset;
		} else {
			coderInfo[channel].max_sfb = hEncoder->srInfo->num_cb_long;
			coderInfo[channel].nr_of_sfb = hEncoder->srInfo->num_cb_long;

			coderInfo[channel].num_window_groups = 1;
			coderInfo[channel].window_group_length[0] = 1;

			offset = 0;
			for (sb = 0; sb < coderInfo[channel].nr_of_sfb; sb++) {
				coderInfo[channel].sfb_offset[sb] = offset;
				offset += hEncoder->srInfo->cb_width_long[sb];
			}
			coderInfo[channel].sfb_offset[coderInfo[channel].nr_of_sfb] = offset;
		}
	}

	/* Perform TNS analysis and filtering */
	for (channel = 0; channel < numChannels; channel++) {
		if ((!channelInfo[channel].lfe) && (useTns)) {
			TnsEncode(&(coderInfo[channel].tnsInfo),
				coderInfo[channel].max_sfb,
				coderInfo[channel].max_sfb,
				(WINDOW_TYPE)coderInfo[channel].block_type,
				coderInfo[channel].sfb_offset,
				hEncoder->freqBuff[channel]);
		} else {
			coderInfo[channel].tnsInfo.tnsDataPresent = 0;      /* TNS not used for LFE */
		}
	}

	for(channel = 0; channel < numChannels; channel++)
	{
		if((coderInfo[channel].tnsInfo.tnsDataPresent != 0) && (useTns))
			tnsInfo_for_LTP = &(coderInfo[channel].tnsInfo);
		else
			tnsInfo_for_LTP = NULL;

		if(channelInfo[channel].present && (!channelInfo[channel].lfe) &&
			(coderInfo[channel].block_type != ONLY_SHORT_WINDOW) &&
			(mpegVersion == MPEG4) && (aacObjectType == LTP))
		{
			LtpEncode(hEncoder,
				&coderInfo[channel],
				&(coderInfo[channel].ltpInfo),
				tnsInfo_for_LTP,
				hEncoder->freqBuff[channel],
				hEncoder->ltpTimeBuff[channel]);
		} else {
			coderInfo[channel].ltpInfo.global_pred_flag = 0;
		}
	}

	for(channel = 0; channel < numChannels; channel++)
	{
		if ((aacObjectType == MAIN) && (!channelInfo[channel].lfe)) {
			int numPredBands = min(coderInfo[channel].max_pred_sfb, coderInfo[channel].nr_of_sfb);
			PredCalcPrediction(hEncoder->freqBuff[channel],
				coderInfo[channel].requantFreq,
				coderInfo[channel].block_type,
				numPredBands,
				(coderInfo[channel].block_type==ONLY_SHORT_WINDOW)?
				hEncoder->srInfo->cb_width_short:hEncoder->srInfo->cb_width_long,
				coderInfo,
				channelInfo,
				channel);
		} else {
			coderInfo[channel].pred_global_flag = 0;
		}
	}

	for (channel = 0; channel < numChannels; channel++) {
		if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
			SortForGrouping(&coderInfo[channel],
				&hEncoder->psyInfo[channel],
				&channelInfo[channel],
				hEncoder->srInfo->cb_width_short,
				hEncoder->freqBuff[channel]);
		}
		CalcAvgEnrg(&coderInfo[channel], hEncoder->freqBuff[channel]);

		// reduce LFE bandwidth
		if (!channelInfo[channel].cpe && channelInfo[channel].lfe)
		{
			coderInfo[channel].nr_of_sfb = coderInfo[channel].max_sfb = 3;
		}
	}

	MSEncode(coderInfo, channelInfo, hEncoder->freqBuff, numChannels, allowMidside);

	for (channel = 0; channel < numChannels; channel++)
	{
		CalcAvgEnrg(&coderInfo[channel], hEncoder->freqBuff[channel]);
	}

#ifdef DRM
	/* loop the quantization until the desired bit-rate is reached */
	diff = 1; /* to enter while loop */
	hEncoder->aacquantCfg.quality = 120; /* init quality setting */
	while (diff > 0) { /* if too many bits, do it again */
#endif
		/* Quantize and code the signal */
		for (channel = 0; channel < numChannels; channel++) {
			if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
				AACQuantize(&coderInfo[channel], &hEncoder->psyInfo[channel],
					&channelInfo[channel], hEncoder->srInfo->cb_width_short,
					hEncoder->srInfo->num_cb_short, hEncoder->freqBuff[channel],
					&(hEncoder->aacquantCfg));
			} else {
				AACQuantize(&coderInfo[channel], &hEncoder->psyInfo[channel],
					&channelInfo[channel], hEncoder->srInfo->cb_width_long,
					hEncoder->srInfo->num_cb_long, hEncoder->freqBuff[channel],
					&(hEncoder->aacquantCfg));
			}
		}

#ifdef DRM
		/* Write the AAC bitstream */
		bitStream = OpenBitStream(bufferSize, outputBuffer);
		WriteBitstream(hEncoder, coderInfo, channelInfo, bitStream, numChannels);

		/* Close the bitstream and return the number of bytes written */
		frameBytes = CloseBitStream(bitStream);

		/* now calculate desired bits and compare with actual encoded bits */
		desbits = (int) ((double) numChannels * (hEncoder->config.bitRate * FRAME_LEN)
			/ hEncoder->sampleRate);

		diff = ((frameBytes - 1 /* CRC */) * 8) - desbits;

		/* do linear correction according to relative difference */
		fix = (double) desbits / ((frameBytes - 1 /* CRC */) * 8);

		/* speed up convergence. A value of 0.92 gives approx up to 10 iterations */
		if (fix > 0.92)
			fix = 0.92;

		hEncoder->aacquantCfg.quality *= fix;

		/* quality should not go lower than 1, set diff to exit loop */
		if (hEncoder->aacquantCfg.quality <= 1)
			diff = -1;
	}
#endif

	// fix max_sfb in CPE mode
	for (channel = 0; channel < numChannels; channel++)
	{
		if (channelInfo[channel].present
			&& (channelInfo[channel].cpe)
			&& (channelInfo[channel].ch_is_left))
		{
			CoderInfo *cil, *cir;

			cil = &coderInfo[channel];
			cir = &coderInfo[channelInfo[channel].paired_ch];

			cil->max_sfb = cir->max_sfb = max(cil->max_sfb, cir->max_sfb);
			cil->nr_of_sfb = cir->nr_of_sfb = cil->max_sfb;
		}
	}

	MSReconstruct(coderInfo, channelInfo, numChannels);

	for (channel = 0; channel < numChannels; channel++)
	{
		/* If short window, reconstruction not needed for prediction */
		if ((coderInfo[channel].block_type == ONLY_SHORT_WINDOW)) {
			int sind;
			for (sind = 0; sind < BLOCK_LEN_LONG; sind++) {
				coderInfo[channel].requantFreq[sind] = 0.0;
			}
		} else {

			if((coderInfo[channel].tnsInfo.tnsDataPresent != 0) && (useTns))
				tnsDecInfo = &(coderInfo[channel].tnsInfo);
			else
				tnsDecInfo = NULL;

			if ((!channelInfo[channel].lfe) && (aacObjectType == LTP)) {  /* no reconstruction needed for LFE channel*/

				LtpReconstruct(&coderInfo[channel], &(coderInfo[channel].ltpInfo),
					coderInfo[channel].requantFreq);

				if(tnsDecInfo != NULL)
					TnsDecodeFilterOnly(&(coderInfo[channel].tnsInfo), coderInfo[channel].nr_of_sfb,
					coderInfo[channel].max_sfb, (WINDOW_TYPE)coderInfo[channel].block_type,
					coderInfo[channel].sfb_offset, coderInfo[channel].requantFreq);

				IFilterBank(hEncoder, &coderInfo[channel],
					coderInfo[channel].requantFreq,
					coderInfo[channel].ltpInfo.time_buffer,
					coderInfo[channel].ltpInfo.ltp_overlap_buffer,
					MOVERLAPPED);

				LtpUpdate(&(coderInfo[channel].ltpInfo),
					coderInfo[channel].ltpInfo.time_buffer,
					coderInfo[channel].ltpInfo.ltp_overlap_buffer,
					BLOCK_LEN_LONG);
			}
		}
	}

#ifndef DRM
	/* Write the AAC bitstream */
	bitStream = OpenBitStream(bufferSize, outputBuffer);

	WriteBitstream(hEncoder, coderInfo, channelInfo, bitStream, numChannels);

	/* Close the bitstream and return the number of bytes written */
	frameBytes = CloseBitStream(bitStream);

	/* Adjust quality to get correct average bitrate */
	if (hEncoder->config.bitRate)
	{
		double fix;
		int desbits = numChannels * (hEncoder->config.bitRate * FRAME_LEN)
			/ hEncoder->sampleRate;
		int diff = (frameBytes * 8) - desbits;

		hEncoder->bitDiff += diff;
		fix = (double)hEncoder->bitDiff / desbits;
		fix *= 0.01;
		fix = max(fix, -0.2);
		fix = min(fix, 0.2);

		if (((diff > 0) && (fix > 0.0)) || ((diff < 0) && (fix < 0.0)))
		{
			hEncoder->aacquantCfg.quality *= (1.0 - fix);
			if (hEncoder->aacquantCfg.quality > 300)
				hEncoder->aacquantCfg.quality = 300;
			if (hEncoder->aacquantCfg.quality < 50)
				hEncoder->aacquantCfg.quality = 50;
		}
	}
#endif

	return frameBytes;
}
时间: 2024-11-08 20:30:21

FAAC源码分析之faacEncEncode的相关文章

faac源码分析之解码参数配置

FAAC定义了一个结构体用来定义解码器的工作解码参数,该结构体的定义如下所示: typedef struct faacEncConfiguration { /* config version */ int version; /* library version */ char *name; /* copyright string */ char *copyright; /* MPEG version, 2 or 4 */ unsigned int mpegVersion; /* AAC obje

faac源码分析之faacEncOpen

faacEncOpen的代码流程图如下所示: 主要包括初始化解码器句柄以及使用的默认参数,最后的是各个功能模块的初始化,主要是分配内存,初始化一些系数表. faacEncHandle FAACAPI faacEncOpen(unsigned long sampleRate, unsigned int numChannels, unsigned long *inputSamples, unsigned long *maxOutputBytes) { unsigned int channel; fa

TeamTalk源码分析之login_server

login_server是TeamTalk的登录服务器,负责分配一个负载较小的MsgServer给客户端使用,按照新版TeamTalk完整部署教程来配置的话,login_server的服务端口就是8080,客户端登录服务器地址配置如下(这里是win版本客户端): 1.login_server启动流程 login_server的启动是从login_server.cpp中的main函数开始的,login_server.cpp所在工程路径为server\src\login_server.下表是logi

Android触摸屏事件派发机制详解与源码分析二(ViewGroup篇)

1 背景 还记得前一篇<Android触摸屏事件派发机制详解与源码分析一(View篇)>中关于透过源码继续进阶实例验证模块中存在的点击Button却触发了LinearLayout的事件疑惑吗?当时说了,在那一篇咱们只讨论View的触摸事件派发机制,这个疑惑留在了这一篇解释,也就是ViewGroup的事件派发机制. PS:阅读本篇前建议先查看前一篇<Android触摸屏事件派发机制详解与源码分析一(View篇)>,这一篇承接上一篇. 关于View与ViewGroup的区别在前一篇的A

HashMap与TreeMap源码分析

1. 引言     在红黑树--算法导论(15)中学习了红黑树的原理.本来打算自己来试着实现一下,然而在看了JDK(1.8.0)TreeMap的源码后恍然发现原来它就是利用红黑树实现的(很惭愧学了Java这么久,也写过一些小项目,也使用过TreeMap无数次,但到现在才明白它的实现原理).因此本着"不要重复造轮子"的思想,就用这篇博客来记录分析TreeMap源码的过程,也顺便瞅一瞅HashMap. 2. 继承结构 (1) 继承结构 下面是HashMap与TreeMap的继承结构: pu

Linux内核源码分析--内核启动之(5)Image内核启动(rest_init函数)(Linux-3.0 ARMv7)【转】

原文地址:Linux内核源码分析--内核启动之(5)Image内核启动(rest_init函数)(Linux-3.0 ARMv7) 作者:tekkamanninja 转自:http://blog.chinaunix.net/uid-25909619-id-4938395.html 前面粗略分析start_kernel函数,此函数中基本上是对内存管理和各子系统的数据结构初始化.在内核初始化函数start_kernel执行到最后,就是调用rest_init函数,这个函数的主要使命就是创建并启动内核线

Spark的Master和Worker集群启动的源码分析

基于spark1.3.1的源码进行分析 spark master启动源码分析 1.在start-master.sh调用master的main方法,main方法调用 def main(argStrings: Array[String]) { SignalLogger.register(log) val conf = new SparkConf val args = new MasterArguments(argStrings, conf) val (actorSystem, _, _, _) =

Solr4.8.0源码分析(22)之 SolrCloud的Recovery策略(三)

Solr4.8.0源码分析(22)之 SolrCloud的Recovery策略(三) 本文是SolrCloud的Recovery策略系列的第三篇文章,前面两篇主要介绍了Recovery的总体流程,以及PeerSync策略.本文以及后续的文章将重点介绍Replication策略.Replication策略不但可以在SolrCloud中起到leader到replica的数据同步,也可以在用多个单独的Solr来实现主从同步.本文先介绍在SolrCloud的leader到replica的数据同步,下一篇

zg手册 之 python2.7.7源码分析(4)-- pyc字节码文件

什么是字节码 python解释器在执行python脚本文件时,对文件中的python源代码进行编译,编译的结果就是byte code(字节码) python虚拟机执行编译好的字节码,完成程序的运行 python会为导入的模块创建字节码文件 字节码文件的创建过程 当a.py依赖b.py时,如在a.py中import b python先检查是否有b.pyc文件(字节码文件),如果有,并且修改时间比b.py晚,就直接调用b.pyc 否则编译b.py生成b.pyc,然后加载新生成的字节码文件 字节码对象