特征提取函数:
int _sift_features( IplImage* img, struct feature** feat, int intvls, double sigma, double contr_thr, int curv_thr, int img_dbl, int descr_width, int descr_hist_bins ) { IplImage* init_img; IplImage*** gauss_pyr, *** dog_pyr; CvMemStorage* storage; CvSeq* features; int octvs, i, n = 0; /* check arguments */ if( ! img ) fatal_error( "NULL pointer error, %s, line %d", __FILE__, __LINE__ ); if( ! feat ) fatal_error( "NULL pointer error, %s, line %d", __FILE__, __LINE__ ); /* build scale space pyramid; smallest dimension of top level is ~4 pixels */ init_img = create_init_img( img, img_dbl, sigma );//创建初始图像,灰度,每位32F(单精度浮点)来存储。 octvs = log( MIN( init_img->width, init_img->height ) ) / log(2) - 2; //从原始图像构建金字塔的层数,-2 使得最小的一层图像有4个像素,而不是1个像素 gauss_pyr = build_gauss_pyr( init_img, octvs, intvls, sigma );//创建高斯金字塔,octvs层数(0层是原图,向上每层降采样一次),intvls每层中尺寸相同但是模糊程度不同,sigma模糊的初始参数,intvals+3 dog_pyr = build_dog_pyr( gauss_pyr, octvs, intvls );//差分金字塔,层数不变,每层中用相邻图像相减,故每层图像数intvls+2 storage = cvCreateMemStorage( 0 ); features = scale_space_extrema( dog_pyr, octvs, intvls, contr_thr, curv_thr, storage );//检测极值点,在同一层中的图片,的一个像素点,如果是相邻另个模糊度图像对应3x3window中的max值(像素>0时),或者为min值(像素<0时),并且经过插值获得亚像素极值点的坐标,测试对比度,边缘影响,最后将该像素对应在原图中的坐标记录到feat中,将所有满足条件的像素的feat串联到featuresSeq中。 calc_feature_scales( features, sigma, intvls ); if( img_dbl ) adjust_for_img_dbl( features ); calc_feature_oris( features, gauss_pyr ); compute_descriptors( features, gauss_pyr, descr_width, descr_hist_bins ); /* sort features by decreasing scale and move from CvSeq to array */ cvSeqSort( features, (CvCmpFunc)feature_cmp, NULL ); n = features->total; *feat = calloc( n, sizeof(struct feature) ); *feat = cvCvtSeqToArray( features, *feat, CV_WHOLE_SEQ ); for( i = 0; i < n; i++ ) { free( (*feat)[i].feature_data ); (*feat)[i].feature_data = NULL; } cvReleaseMemStorage( &storage ); cvReleaseImage( &init_img ); release_pyr( &gauss_pyr, octvs, intvls + 3 ); release_pyr( &dog_pyr, octvs, intvls + 2 ); return n; }
通过计算特征向量的主曲率半径来判断特征是否是边缘which will导致不稳定,即去除边缘响应:
static int is_too_edge_like( IplImage* dog_img, int r, int c, int curv_thr ) { double d, dxx, dyy, dxy, tr, det; /* principal curvatures are computed using the trace and det of Hessian */ d = pixval32f(dog_img, r, c); dxx = pixval32f( dog_img, r, c+1 ) + pixval32f( dog_img, r, c-1 ) - 2 * d; dyy = pixval32f( dog_img, r+1, c ) + pixval32f( dog_img, r-1, c ) - 2 * d; dxy = ( pixval32f(dog_img, r+1, c+1) - pixval32f(dog_img, r+1, c-1) - pixval32f(dog_img, r-1, c+1) + pixval32f(dog_img, r-1, c-1) ) / 4.0; tr = dxx + dyy; det = dxx * dyy - dxy * dxy; /* negative determinant -> curvatures have different signs; reject feature */ if( det <= 0 ) return 1; if( tr * tr / det < ( curv_thr + 1.0 )*( curv_thr + 1.0 ) / curv_thr ) return 0; return 1; }
calc_features_scales函数很简单,就是填充所有特征点对应的scl和scl_octv变量:
static void calc_feature_scales( CvSeq* features, double sigma, int intvls ) { struct feature* feat; struct detection_data* ddata; double intvl; int i, n; n = features->total; for( i = 0; i < n; i++ ) { feat = CV_GET_SEQ_ELEM( struct feature, features, i ); ddata = feat_detection_data( feat ); intvl = ddata->intvl + ddata->subintvl; feat->scl = sigma * pow( 2.0, ddata->octv + intvl / intvls ); ddata->scl_octv = sigma * pow( 2.0, intvl / intvls ); } }
函数adjust_for_img_dbl(),如果在开始的时候将图像dbl扩大了,这是就需要将特征点中的坐标值/2处理:
n = features->total; for( i = 0; i < n; i++ ) { feat = CV_GET_SEQ_ELEM( struct feature, features, i ); feat->x /= 2.0; feat->y /= 2.0; feat->scl /= 2.0; feat->img_pt.x /= 2.0; feat->img_pt.y /= 2.0; }
在计算机中真是说不清‘2’这个数字有多重要,金字塔中也到处是它:)
下面是计算特征方向的函数:
static void calc_feature_oris( CvSeq* features, IplImage*** gauss_pyr ) { struct feature* feat; struct detection_data* ddata; double* hist; double omax; int i, j, n = features->total; for( i = 0; i < n; i++ ) { feat = malloc( sizeof( struct feature ) ); cvSeqPopFront( features, feat ); ddata = feat_detection_data( feat ); hist = ori_hist( gauss_pyr[ddata->octv][ddata->intvl], ddata->r, ddata->c, SIFT_ORI_HIST_BINS, cvRound( SIFT_ORI_RADIUS * ddata->scl_octv ), SIFT_ORI_SIG_FCTR * ddata->scl_octv ); for( j = 0; j < SIFT_ORI_SMOOTH_PASSES; j++ ) smooth_ori_hist( hist, SIFT_ORI_HIST_BINS ); omax = dominant_ori( hist, SIFT_ORI_HIST_BINS ); add_good_ori_features( features, hist, SIFT_ORI_HIST_BINS, omax * SIFT_ORI_PEAK_RATIO, feat ); free( ddata ); free( feat ); free( hist ); } }
还是将每个特征取出来[函数cvSeqPopFront删除序列的头部元素],针对每个特征像素点统计其所在窗口图像中的方向梯度直方图,主要就是函数ori_hist():
直方图用一个double数组在存储,统计窗口图像中每个像素所在的dx dy ->梯度的角度和梯度的模,然后将角度转换为hist数组的index,累加值是梯度的模(距离加权后)。窗口的半径大小跟所在的尺度有关。
对得到的hist通过函数smooth_ori_hist()多次进行平滑,也是通过相加求平均的方法。
函数dominant_ori()用来求得直方图中值最大的那个方向的值,然后用这个值乘上一个系数作为阈值用于add_good_ori_fetures()函数:
static void add_good_ori_features( CvSeq* features, double* hist, int n, double mag_thr, struct feature* feat ) { struct feature* new_feat; double bin, PI2 = CV_PI * 2.0; int l, r, i; for( i = 0; i < n; i++ ) { l = ( i == 0 )? n - 1 : i-1; r = ( i + 1 ) % n; if( hist[i] > hist[l] && hist[i] > hist[r] && hist[i] >= mag_thr ) { bin = i + interp_hist_peak( hist[l], hist[i], hist[r] ); bin = ( bin < 0 )? n + bin : ( bin >= n )? bin - n : bin; new_feat = clone_feature( feat ); new_feat->ori = ( ( PI2 * bin ) / n ) - CV_PI; cvSeqPush( features, new_feat ); free( new_feat ); } } }
该函数通过遍历整个hist直方图中的所有方向,然后找到局部极值点并且满足阈值的,填充ori变量,构成新特征,然后将此特征再添加会队列中。(可能有多个满足条件的点)
----
好了至此,所有特征点的坐标、所在尺度、其方向都已经得到,就剩下构成特征向量了:
static void compute_descriptors( CvSeq* features, IplImage*** gauss_pyr, int d, int n) { struct feature* feat; struct detection_data* ddata; double*** hist; int i, k = features->total; for( i = 0; i < k; i++ ) { feat = CV_GET_SEQ_ELEM( struct feature, features, i ); ddata = feat_detection_data( feat ); hist = descr_hist( gauss_pyr[ddata->octv][ddata->intvl], ddata->r, ddata->c, feat->ori, ddata->scl_octv, d, n ); hist_to_descr( hist, d, n, feat ); release_descr_hist( &hist, d ); } }
这里的descr_hist函数也是计算特征点所在窗口中的梯度方向分布->由于需要具有旋转不变性的特征,所以这里的梯度方向是相对于上面检测出来的特征像素的主方向的角度差来统计的:
函数里面有个坐标变换和求角度差的过程
static double*** descr_hist( IplImage* img, int r, int c, double ori, double scl, int d, int n ) { double*** hist; double cos_t, sin_t, hist_width, exp_denom, r_rot, c_rot, grad_mag, grad_ori, w, rbin, cbin, obin, bins_per_rad, PI2 = 2.0 * CV_PI; int radius, i, j; hist = calloc( d, sizeof( double** ) ); for( i = 0; i < d; i++ ) { hist[i] = calloc( d, sizeof( double* ) ); for( j = 0; j < d; j++ ) hist[i][j] = calloc( n, sizeof( double ) ); } cos_t = cos( ori ); sin_t = sin( ori ); bins_per_rad = n / PI2; exp_denom = d * d * 0.5; hist_width = SIFT_DESCR_SCL_FCTR * scl; radius = hist_width * sqrt(2) * ( d + 1.0 ) * 0.5 + 0.5; for( i = -radius; i <= radius; i++ ) for( j = -radius; j <= radius; j++ ) { /* Calculate sample‘s histogram array coords rotated relative to ori. Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e. r_rot = 1.5) have full weight placed in row 1 after interpolation. */ c_rot = ( j * cos_t - i * sin_t ) / hist_width; r_rot = ( j * sin_t + i * cos_t ) / hist_width; rbin = r_rot + d / 2 - 0.5; cbin = c_rot + d / 2 - 0.5; if( rbin > -1.0 && rbin < d && cbin > -1.0 && cbin < d ) if( calc_grad_mag_ori( img, r + i, c + j, &grad_mag, &grad_ori )) { grad_ori -= ori; while( grad_ori < 0.0 ) grad_ori += PI2; while( grad_ori >= PI2 ) grad_ori -= PI2; obin = grad_ori * bins_per_rad; w = exp( -(c_rot * c_rot + r_rot * r_rot) / exp_denom ); interp_hist_entry( hist, rbin, cbin, obin, grad_mag * w, d, n ); } } return hist; }
遍历窗口中的每一个像素,获得其梯度的新的相对角度,然后在相对坐标系中重新计算梯度方向直方图。这里涉及到一个3次插值,首先新坐标中的row、col、orientation都是浮点数,对
于离散化的图像来说,并不是简单的将其转换为整型数据,要利用精确数据的小数部分对两侧的数据进行加权然后累加。总而言之就是首先获得特征点窗口图像区域的梯度方向直方图,然后
计算新的相对坐标系中的点(浮点数),利用浮点数的小数部分对原梯度直方图中的数据(row、col、ori)进行加权累加到新的梯度直方图中,详见函数interp_hist_entry():
static void interp_hist_entry( double*** hist, double rbin, double cbin, double obin, double mag, int d, int n ) { double d_r, d_c, d_o, v_r, v_c, v_o; double** row, * h; int r0, c0, o0, rb, cb, ob, r, c, o; r0 = cvFloor( rbin ); c0 = cvFloor( cbin ); o0 = cvFloor( obin ); d_r = rbin - r0; d_c = cbin - c0; d_o = obin - o0; /* The entry is distributed into up to 8 bins. Each entry into a bin is multiplied by a weight of 1 - d for each dimension, where d is the distance from the center value of the bin measured in bin units. */ for( r = 0; r <= 1; r++ ) { rb = r0 + r; if( rb >= 0 && rb < d ) { v_r = mag * ( ( r == 0 )? 1.0 - d_r : d_r ); row = hist[rb]; for( c = 0; c <= 1; c++ ) { cb = c0 + c; if( cb >= 0 && cb < d ) { v_c = v_r * ( ( c == 0 )? 1.0 - d_c : d_c ); h = row[cb]; for( o = 0; o <= 1; o++ ) { ob = ( o0 + o ) % n; v_o = v_c * ( ( o == 0 )? 1.0 - d_o : d_o ); h[ob] += v_o; } } } } } }
好了,至此得到了一个三维数组,row、col对应于窗口图像中点的坐标,ori对应于该位置上的直方图,该直方图的ori元素中的值是由相邻亚像素位置的梯度模值加权累加得到的,下面对
这个三维数组进行简单处理得到最终的特征描述descriptor:
static void hist_to_descr( double*** hist, int d, int n, struct feature* feat ) { int int_val, i, r, c, o, k = 0; for( r = 0; r < d; r++ ) for( c = 0; c < d; c++ ) for( o = 0; o < n; o++ ) feat->descr[k++] = hist[r][c][o]; feat->d = k; normalize_descr( feat ); for( i = 0; i < k; i++ ) if( feat->descr[i] > SIFT_DESCR_MAG_THR ) feat->descr[i] = SIFT_DESCR_MAG_THR; normalize_descr( feat ); /* convert floating-point descriptor to integer valued descriptor */ for( i = 0; i < k; i++ ) { int_val = SIFT_INT_DESCR_FCTR * feat->descr[i]; feat->descr[i] = MIN( 255, int_val ); } }
首先简单的将三维存储的数据一维化,然后进行归一化,处理下较大的值,再次归一化,然后将浮点类型的数据展开成0~255的整型特征,这样该特征点对应的sift特征就提取完成了。
整个过程和HOG算法有很多类似的地方,尤其是三次线性插值的过程。
--------------------
至此将适当的处理下数据类型,释放相关的中间变量之后整个_sift_features()特征提取函数就完成了。在调用该函数的时候会传入feat特征的指针,会在_sift_featrues()函数内部进行
特征内存的分配。