以上仅给出了代码。具体BP实现原理及神经网络相关知识请见:神经网络和反向传播算法推导
首先是前向传播的计算:
输入:
首先为正整数 n、m、p、t,分别代表特征个数、训练样本个数、隐藏层神经元个数、输出
层神经元个数。其中(1<n<=100,1<m<=1000, 1<p<=100, 1<t<=10)。
随后为 m 行,每行有 n+1 个整数。每行代表一个样本中的 n 个特征值 (x 1 , x 2 ,..., x n ) 与样本的
实际观测结果 y。特征值的取值范围是实数范围,实际观测结果为(1-t 的正整数)。
最后为 2 组特征权值矩阵初始化值。
第一组为输入层与隐藏层特征权值矩阵,矩阵大小为 p*(n+1)。
第二组为隐藏层与输出层特征权值矩阵,矩阵大小为 t*(p+1)。
输出:
包括三部分:
第一行为 1 个浮点数,是神经网络使用初始特征权值矩阵计算出的代价值 J。
然后是 m 行,每行为 p 个浮点数,神经网络隐藏层的输出(不算偏移 bias)。
最后是 m 行,每行为 t 个浮点数,神经网络输出层的输出(不算偏移 bias)。
Sample Input1:
3 3 5 3
0.084147 0.090930 0.014112 3
0.090930 0.065699 -0.053657 2
2 3 4 1
0.084147 -0.027942 -0.099999 -0.028790
0.090930 0.065699 -0.053657 -0.096140
0.014112 0.098936 0.042017 -0.075099
-0.075680 0.041212 0.099061 0.014988
-0.095892 -0.054402 0.065029 0.091295
0.084147 -0.075680 0.065699 -0.054402 0.042017 -0.028790
0.090930 -0.095892 0.098936 -0.099999 0.099061 -0.096140
0.014112 -0.027942 0.041212 -0.053657 0.065029 -0.075099
Sample Output1:
2.0946610.518066 0.522540 0.506299 0.484257 0.476700
0.519136 0.524614 0.507474 0.483449 0.474655
0.404465 0.419895 0.509409 0.589979 0.587968
0.514583 0.511113 0.497424
0.514587 0.511139 0.497447
0.515313 0.511164 0.496748
此处需要补充说明的是这里计算的只是单层神经网络并且在lable原本的值是3,2,1代表的是第一次输出第三个输出单元输出为1,第二次输出第二个输出单元输出为1...
python代码如下:
#coding=utf-8 from numpy import * #from math import * from numpy.distutils.core import numpy_cmdclass f=open( r'test') input=[] #数据预处理,把文件数据转换 for each in f: input.append(each.strip().split()) n,m,p,t=input[0] sample=input[1:int(n)+1] w_in_hidden=input[int(n)+1:int(n)+6] w_hidden_out=input[int(n)+6:] feature=[]#特征矩阵 lable=[]#标记 for each in sample: feature.append(each[:-1]) lable.append(each[-1]) #将list转化成矩阵 feature=mat(feature) lable=mat(lable) w_in_hidden=mat(w_in_hidden)#隐藏层与输入层的权值矩阵 w_hidden_out=mat(w_hidden_out)#隐藏层与输出层的权值矩阵 #逆置 feature=feature.T zero=mat(ones(feature.shape[0])) feature=row_stack((zero,feature)) #将第0行加入矩阵,属矩阵拼接问题 feature=feature.astype(dtype=float) #生成新的矩阵,并改变矩阵内部数据类型,以前是str型的 w_in_hidden=w_in_hidden.astype(dtype=float) lable=lable.astype(dtype=float) w_hidden_out=w_hidden_out.astype(dtype=float) hidden_output=dot(w_in_hidden,feature) hidden_output=hidden_output.T #此处exp是numpy里面自带的求矩阵指数的函数 hidden_output=1/(1+exp(-1*hidden_output)) print hidden_output#隐藏层的输出 hidden_output=hidden_output.T zero=mat(ones(hidden_output.shape[1])) hidden_output=row_stack((zero,hidden_output)) output=dot(w_hidden_out,hidden_output) output=output.T output=1/(1+exp(-1*output)) print output#输出层的输出 #lable原本的值是3,2,1代表的是第一次输出第三个输出单元输出为1,第二次输出第二个输出单元输出为1... lable=mat([[0,0,1],[0,1,0],[1,0,0]]) lable=lable.T output=output.tolist()#将矩阵转化回list lable=lable.tolist() sum=0.0 #计算误差,其实也可以直接用矩阵计算,问题在于本人没有找到求矩阵对角线和的函数,且做一标记,找到补上 for i in range (len(output)): for j in range (len(output[0])): sum+=math.log(output[i][j])*-lable[i][j]-math.log(1-output[i][j])*(1-lable[i][j]) print sum/3
此处输出顺序不对,请忽略这种小问题~~
输出结果如下:
C代码如下:(C代码)
#include <stdio.h> #include <math.h> #define MAX_SAMPLE_NUMBER 1024 #define MAX_FEATURE_DIMENSION 128 #define MAX_LABEL_NUMBER 12 double sigmoid(double z){ return 1 / (1 + exp(-z)); } double hypothesis(double x[], double theta[], int feature_number){ //此处的hypothesis计算的是某个神经元的输出 double h = 0; for (int i = 0; i <= feature_number; i++){ h += x[i] * theta[i]; } return sigmoid(h); } void forward_propagation(double a[], int feature_number, double W[][MAX_FEATURE_DIMENSION], int neuron_num, double output[]){ for (int i = 0; i < neuron_num; i++){ output[i+1] = hypothesis(a, W[i], feature_number); //w[i]对应着第i个输出神经元的上一层权值 } } double compute_cost(double X[][MAX_FEATURE_DIMENSION], int y[], int feature_number, int sample_number, double W1[][MAX_FEATURE_DIMENSION], int hidden_layer_size, double W2[][MAX_FEATURE_DIMENSION], int label_num, double a2[][MAX_FEATURE_DIMENSION], double a3[][MAX_FEATURE_DIMENSION]){ //a2为隐藏层输出a3为输出层输出w1,w2相同 double sum = 0; for (int i = 0; i < sample_number; i++){ X[i][0] = 1; forward_propagation(X[i], feature_number, W1, hidden_layer_size, a2[i]); a2[i][0] = 1; forward_propagation(a2[i], hidden_layer_size, W2, label_num, a3[i]); double yy[MAX_LABEL_NUMBER] = {0}; yy[y[i]] = 1; for (int j = 1; j <= label_num; j++){ sum += -yy[j] * log(a3[i][j]) - (1 - yy[j]) * log(1 - a3[i][j]); } } return sum / sample_number; } double X[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION]; int y[MAX_SAMPLE_NUMBER]; double W1[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION]; double W2[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION]; double a2[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION]; double a3[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION]; int main(){ int feature_number; int sample_number; int hidden_layer_size; int label_num; scanf("%d %d %d %d", &feature_number, &sample_number, &hidden_layer_size, &label_num); for (int i = 0; i < sample_number; i++){ for (int j = 1; j <= feature_number; j++){ scanf("%lf", &X[i][j]); } scanf("%d", &y[i]); } for (int i = 0; i < hidden_layer_size; i++){ for (int j = 0; j <= feature_number; j++){ scanf("%lf", &W1[i][j]); } } for (int i = 0; i < label_num; i++){ for (int j = 0; j <= hidden_layer_size; j++){ scanf("%lf", &W2[i][j]); } } double J = compute_cost(X, y, feature_number, sample_number, W1, hidden_layer_size, W2, label_num, a2, a3); printf("%lf\n", J); for (int i = 0; i < sample_number; i++){ for (int j = 1; j < hidden_layer_size; j++){ printf("%lf ", a2[i][j]); } printf("%lf\n", a2[i][hidden_layer_size]); } for (int i = 0; i < sample_number; i++){ for (int j = 1; j < label_num; j++){ printf("%lf ", a3[i][j]); } printf("%lf\n", a3[i][label_num]); } return 0; }
结果如下:
关于BP算法,没有找到合适的测试样例,此处仅仅给出了C++版本代码和自测数据,无验证集
C++代码:
#include <stdio.h> #include <math.h> double sigmoid(double z){ return 1 / (1 + exp(-z)); } double hypothesis(double x[], double theta[], int feature_number){ double h = 0; for (int i = 0; i <= feature_number; i++){ h += x[i] * theta[i]; } return h; } #define MAX_FEATURE_DIMENSION 128 #define MAX_LABEL_NUMBER 12 void forward_propagation(double input[], int feature_number, double W[][MAX_FEATURE_DIMENSION], int neuron_num, double z[], double a[]){ for (int i = 0; i < neuron_num; i++){ z[i+1] = hypothesis(input, W[i], feature_number); a[i+1] = sigmoid(z[i+1]); //加1的原因是第一个要留作补充的神经元 } } double sigmoid_gradient(double z){ return sigmoid(z) * (1 - sigmoid(z)); //对sigmoid函数求导可以化成如此形式,要注意的是z才是自变量 } void compute_layer_error(double layer_error[], double W[][MAX_FEATURE_DIMENSION], int neuron_num, int feature_number, double next_layer_error[], double z[]){ //此处计算的是theta(l)具体见上一篇博文 for (int i = 1; i <= feature_number; i++){ for (int j = 0; j < neuron_num; j++){ layer_error[i] += W[j][i] * next_layer_error[j + 1];//next_layer_error[j + 1]=theta(l+1) } } for (int i = 1; i <=feature_number; i++){ layer_error[i] = layer_error[i] * sigmoid_gradient(z[i]); } } void accumulate_gradient(double sum[][MAX_FEATURE_DIMENSION], double layer_error[], int neuron_num, int feature_number, double a[]){ //计算误差总和 for (int i = 0; i < neuron_num; i++){ for (int j = 0; j <= feature_number; j++){ sum[i][j] += layer_error[i+1] * a[j]; } } } void compute_gradient(double X[][MAX_FEATURE_DIMENSION], int y[], int feature_number, int sample_number, double W1[][MAX_FEATURE_DIMENSION], int hidden_layer_size, double W2[][MAX_FEATURE_DIMENSION], int label_num, double w1_grad[][MAX_FEATURE_DIMENSION], double w2_grad[][MAX_FEATURE_DIMENSION]){ double grad1_sum[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION] = {0}; double grad2_sum[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION] = {0}; for (int i = 0; i < sample_number; i++){ X[i][0] = 1; double z2[MAX_FEATURE_DIMENSION] = {0, 0}; double a2[MAX_FEATURE_DIMENSION] = {1, 0}; forward_propagation(X[i], feature_number, W1, hidden_layer_size, z2, a2); double z3[MAX_FEATURE_DIMENSION] = {0}; double a3[MAX_FEATURE_DIMENSION] = {0}; forward_propagation(a2, hidden_layer_size, W2, label_num, z3, a3); double yy[MAX_LABEL_NUMBER] = {0}; yy[y[i]] = 1; double layer3_error[MAX_FEATURE_DIMENSION] = {0}; for (int j = 1; j <= label_num; j++){ layer3_error[j] = a3[j] - yy[j]; } double layer2_error[MAX_FEATURE_DIMENSION] = {0}; compute_layer_error(layer2_error, W2, label_num, hidden_layer_size, layer3_error, z2); accumulate_gradient(grad2_sum, layer3_error, label_num, hidden_layer_size, a2); accumulate_gradient(grad1_sum, layer2_error, hidden_layer_size, feature_number, X[i]); } for (int i = 0; i < hidden_layer_size; i++){ for (int j = 0; j <= feature_number; j++){ w1_grad[i][j] = grad1_sum[i][j] / sample_number; } } for (int i = 0; i < label_num; i++){ for (int j = 0; j <= hidden_layer_size; j++){ w2_grad[i][j] = grad2_sum[i][j] / sample_number; } } } int main(){ double X[][MAX_FEATURE_DIMENSION] = { {0, 0.084147, 0.090930}, {0, 0.090930, 0.065699}, {0, 2, 3} }; int y[] = {1, 2, 2}; int hidden_layer_size = 4; int label_num = 2; int feature_number = 2; int sample_number = 3; double W1[][MAX_FEATURE_DIMENSION] = { {0.084147, -0.027942, -0.099999}, {0.090930, 0.065699, -0.053657}, {0.014112, 0.098936, 0.042017}, {-0.075680, 0.041212, 0.099061}, }; double W2[][MAX_FEATURE_DIMENSION] = { {0.084147, -0.075680, 0.065699, -0.054402, 0.042017}, {0.090930, -0.095892, 0.098936, -0.099999, 0.099061} }; double a2[10][MAX_FEATURE_DIMENSION] = {0}; double a3[10][MAX_FEATURE_DIMENSION] = {0}; double w1_grad[10][MAX_FEATURE_DIMENSION] = {0}; double w2_grad[10][MAX_FEATURE_DIMENSION] = {0}; compute_gradient(X, y, feature_number, 3, W1, hidden_layer_size, W2, label_num, w1_grad, w2_grad); printf("w1_grad:\n"); for (int i = 0; i < hidden_layer_size; i++){ for (int j = 0; j <= feature_number; j++){ printf("%lf ", w1_grad[i][j]); } printf("\n"); } printf("w2_grad:\n"); for (int i = 0; i < label_num; i++){ for (int j = 0; j <= hidden_layer_size; j++){ printf("%lf ", w2_grad[i][j]); } printf("\n"); } return 0; }
运行截图: