这是第二个作业的最后一部分
首先进行了一些函数导入并定义误差函数
1 def rel_error(x, y): 2 """ returns relative error """ 3 4 return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
上面计算了相对误差,最大是1,最小是0
Load数据进来并打印数据的维度
X_val: (1000, 3, 32, 32) X_train: (49000, 3, 32, 32) X_test: (1000, 3, 32, 32) y_val: (1000,) y_train: (49000,) y_test: (1000,)
我们要在这里写入自己的数据
卷积计算的前向传递过程
1 def conv_forward_naive(x, w, b, conv_param): 2 """ 3 A naive implementation of the forward pass for a convolutional layer. 4 5 The input consists of N data points, each with C channels, height H and width 6 W. We convolve each input with F different filters, where each filter spans 7 all C channels and has height HH and width HH. 8 9 Input: 10 - x: Input data of shape (N, C, H, W) 11 - w: Filter weights of shape (F, C, HH, WW) 12 - b: Biases, of shape (F,) 13 - conv_param: A dictionary with the following keys: 14 - ‘stride‘: The number of pixels between adjacent receptive fields in the 15 horizontal and vertical directions. 16 - ‘pad‘: The number of pixels that will be used to zero-pad the input. 17 18 Returns a tuple of: 19 - out: Output data, of shape (N, F, H‘, W‘) where H‘ and W‘ are given by 20 H‘ = 1 + (H + 2 * pad - HH) / stride 21 W‘ = 1 + (W + 2 * pad - WW) / stride 22 - cache: (x, w, b, conv_param) 23 """ 24 out = None 25 26 ############################################################################# 27 # TODO: Implement the convolutional forward pass. # 28 # Hint: you can use the function np.pad for padding. # 29 ############################################################################# 30 N,C,H,W = x.shape %读出x的维度 31 F ,C, HH, WW = w.shape %读出w的维度 %stride变量是隔几个像素取一次卷积 %pad变量用来添加图片周围,这样可以防止每次卷积后图片变小 32 stride = conv_param[‘stride‘] %conv_param是一个字典,存放了我们需要的变量, 33 num_pad = conv_param[‘pad‘] %提取pad值 34 H_p = 1+(H + 2*num_pad - HH)/stride %这个是卷积后的图片的高度 35 W_p = 1+(W+2*num_pad -WW)/stride %卷积后图片的宽度 %注意pad函数的用法,第二个参数表示在每个维度的左右两次pad多少数值 36 x_pad = np.pad(x,((0,0),(0,0),(num_pad,num_pad),(num_pad,num_pad)),‘constant‘) 37 out = np.zeros((N,F,H_p,W_p))%输出值 38 for i in range(N): 39 for j in range(F): 40 for ii in range(0,H,stride): 41 for jj in range(0,W,stride): % 注意卷积的计算方法 42 out[i,j,ii/stride,jj/stride] = np.sum(x_pad[i,:,ii:ii+HH,jj:jj+WW] * w[j,:,:,:]) + b[j] 43 ############################################################################# 44 # END OF YOUR CODE # 45 ############################################################################# 46 cache = (x, w, b, conv_param) 47 return out, cache
卷积计算对图片的影响
卷积计算反向传递过程
1 def conv_backward_naive(dout, cache): 2 """ 3 A naive implementation of the backward pass for a convolutional layer. 4 5 Inputs: 6 - dout: Upstream derivatives. 7 - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive 8 9 Returns a tuple of: 10 - dx: Gradient with respect to x 11 - dw: Gradient with respect to w 12 - db: Gradient with respect to b 13 """ 14 dx, dw, db = None, None, None 15 ############################################################################# 16 # TODO: Implement the convolutional backward pass. # 17 ############################################################################# 18 x, w, b, conv_param = cache % 取出我们需要的参数 19 num_pad = conv_param[‘pad‘] 20 stride = conv_param[‘stride‘] 21 N,C,H,W = x.shape % 获得相应的维度值 22 F ,C, HH, WW = w.shape 23 H_p = 1+(H + 2*num_pad - HH)/stride 24 W_p = 1+(W+2*num_pad -WW)/stride 25 26 %考虑b这个变量对哪些值存在影响,依次加起来,其实是乘以了ones矩阵,再加起来 27 db =np.sum(np.sum( np.sum(dout, axis = 0),axis =1), axis =1 ) 28 29 x_pad = np.pad(x,((0,0),(0,0),(num_pad,num_pad),(num_pad,num_pad)),‘constant‘) 30 31 dw = np.zeros_like(w) 32 for i in range(N): 33 for j in range(F): 34 for ii in range(0,H,stride): 35 for jj in range(0,W,stride): %注意dw的求解方法 36 dw[j,:,:,:] = dw[j,:,:,:] + x_pad[i,:,ii:ii + HH,jj:jj+WW]*dout[i,j,ii/stride,jj/stride] 37 dx = np.zeros_like(x) 38 #dout_pad = np.pad(x*dout,((0,0),(0,0),(num_pad,num_pad),(num_pad,num_pad)),‘constant‘) 39 dx_temp = np.zeros_like(x_pad) 40 for i in range(N): 41 for j in range(F): 42 for ii in range(0,H,stride): 43 for jj in range(0,W,stride): %注意dx的求法 44 dx_temp[i,:,ii:ii+HH, jj:jj+WW] = dx_temp[i,:,ii:ii+HH, jj:jj+WW]+w[j,:,:,:]*dout[i,j,ii/stride,jj/stride] 45 46 dx = dx_temp[:,:,num_pad:H+num_pad,num_pad:W+num_pad] 47 48 print "IN function dx.shape:", dx.shape 49 ############################################################################# 50 # END OF YOUR CODE # 51 ############################################################################# 52 return dx, dw, db
Max pooling 前向传递过程
1 def max_pool_forward_naive(x, pool_param): 2 """ 3 A naive implementation of the forward pass for a max pooling layer. 4 5 Inputs: 6 - x: Input data, of shape (N, C, H, W) 7 - pool_param: dictionary with the following keys: 8 - ‘pool_height‘: The height of each pooling region 9 - ‘pool_width‘: The width of each pooling region 10 - ‘stride‘: The distance between adjacent pooling regions 11 12 Returns a tuple of: 13 - out: Output data 14 - cache: (x, pool_param) 15 """ 16 out = None 17 18 ############################################################################# 19 # TODO: Implement the max pooling forward pass # 20 ############################################################################# 21 N,C,H,W = x.shape %x的维度 22 pool_height = pool_param[‘pool_height‘] %pool操作所需的参数:高度 23 pool_width = pool_param[‘pool_width‘] % pool操作所需的参数:宽度 24 #print "pool_height:", pool_height 25 #print "pool_width:", pool_width 26 #print "x.shape:",x.shape 27 out = np.zeros((N,C,H/pool_height,W/pool_width)) % 输出 28 #print "out.shape:",out.shape 29 for i in range(N): 30 for j in range(C): 31 for ii in range(0,H,pool_height): 32 for jj in range(0,W,pool_width): 33 #print "i:%d, j:%d, ii:%d, jj:%d"%( i,j,ii,jj) % 注意这个是maxpooling的求法 34 out[i,j,ii/pool_height,jj/pool_width] = np.max(x[i,j,ii:ii+pool_height,jj:jj+pool_width]) 35 36 ############################################################################# 37 # END OF YOUR CODE # 38 ############################################################################# 39 cache = (x, pool_param) 40 return out, cache
Max pooling 后向传递过程
1 def max_pool_backward_naive(dout, cache): 2 """ 3 A naive implementation of the backward pass for a max pooling layer. 4 5 Inputs: 6 - dout: Upstream derivatives 7 - cache: A tuple of (x, pool_param) as in the forward pass. 8 9 Returns: 10 - dx: Gradient with respect to x 11 """ 12 dx = None 13 ############################################################################# 14 # TODO: Implement the max pooling backward pass # 15 ############################################################################# 16 x,pool_param = cache 17 N,C,H,W = x.shape 18 pool_height = pool_param[‘pool_height‘] 19 pool_width = pool_param[‘pool_width‘] 20 dx = np.zeros_like(x) 21 for i in range(N): 22 for j in range(C): 23 for ii in range(0,H,pool_height): 24 for jj in range(0,W,pool_width): % 注意maxpooling操作的反向传播求法 25 posi = np.where(x[i,j,ii:ii+pool_height,jj:jj+pool_width]== np.max(x[i,j,ii:ii+pool_height,jj:jj+pool_width])) 26 dx[i,j,ii:ii+pool_height,jj:jj+pool_width][posi] = 1*dout[i,j,ii/pool_height,jj/pool_width] 27 ############################################################################# 28 # END OF YOUR CODE # 29 ############################################################################# 30 return dx
卷积的快速实现方法(区别于前面的简单的实现方法)(Max pool快速实现类似)
1 t0 = time() 2 out_naive, cache_naive = conv_forward_naive(x, w, b, conv_param) 3 t1 = time() 4 out_fast, cache_fast = conv_forward_strides(x, w, b, conv_param) 5 t2 = time() 6 % 上面是前向的实现 7 t0 = time() 8 dx_naive, dw_naive, db_naive = conv_backward_naive(dout, cache_naive) 9 t1 = time() 10 dx_fast, dw_fast, db_fast = conv_backward_strides(dout, cache_fast) 11 t2 = time() %后面是后向传播的实现
通过记录t0, t1, t2 可以求出每一段语句运行所需的时间!,注意这种用法
将多个子层合在一起,进行梯度求解的数值检查
1 from cs231n.layer_utils import conv_relu_pool_forward, conv_relu_pool_backward 2 3 x = np.random.randn(2, 3, 16, 16) 4 w = np.random.randn(3, 3, 3, 3) 5 b = np.random.randn(3,) 6 dout = np.random.randn(2, 3, 8, 8) 7 conv_param = {‘stride‘: 1, ‘pad‘: 1} 8 pool_param = {‘pool_height‘: 2, ‘pool_width‘: 2, ‘stride‘: 2} 9 10 out, cache = conv_relu_pool_forward(x, w, b, conv_param, pool_param) 11 dx, dw, db = conv_relu_pool_backward(dout, cache) 12 %注意这里lambda函数的使用方法。 13 dx_num = eval_numerical_gradient_array(lambda x: conv_relu_pool_forward(x, w, b, conv_param, pool_param)[0], x, dout) 14 dw_num = eval_numerical_gradient_array(lambda w: conv_relu_pool_forward(x, w, b, conv_param, pool_param)[0], w, dout) 15 db_num = eval_numerical_gradient_array(lambda b: conv_relu_pool_forward(x, w, b, conv_param, pool_param)[0], b, dout) 16 17 print ‘Testing conv_relu_pool‘ 18 print ‘dx error: ‘, rel_error(dx_num, dx) 19 print ‘dw error: ‘, rel_error(dw_num, dw) 20 print ‘db error: ‘, rel_error(db_num, db)
做一个三层的卷积神经网络
检查初始解对不对
1 model = ThreeLayerConvNet() 2 3 N = 50 4 X = np.random.randn(N, 3, 32, 32) 5 y = np.random.randint(10, size=N) 6 7 loss, grads = model.loss(X, y) 8 print ‘Initial loss (no regularization): ‘, loss 9 10 model.reg = 0.5 11 loss, grads = model.loss(X, y) 12 print ‘Initial loss (with regularization): ‘, loss
初始的loss值应该在2.3左右
梯度计算检查
1 num_inputs = 2 2 input_dim = (3, 16, 16) 3 reg = 0.0 4 num_classes = 10 5 X = np.random.randn(num_inputs, *input_dim) 6 y = np.random.randint(num_classes, size=num_inputs) 7 8 model = ThreeLayerConvNet(num_filters=3, filter_size=3, 9 input_dim=input_dim, hidden_dim=7, 10 dtype=np.float64) 11 loss, grads = model.loss(X, y) 12 for param_name in sorted(grads): 13 f = lambda _: model.loss(X, y)[0] 14 param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6) 15 e = rel_error(param_grad_num, grads[param_name]) 16 print ‘%s max relative error: %e‘ % (param_name, rel_error(param_grad_num, grads[param_name]))
对小数据集进行过拟合
1 num_train = 100 2 small_data = { 3 ‘X_train‘: data[‘X_train‘][:num_train], 4 ‘y_train‘: data[‘y_train‘][:num_train], 5 ‘X_val‘: data[‘X_val‘], 6 ‘y_val‘: data[‘y_val‘], 7 } 8 9 model = ThreeLayerConvNet(weight_scale=1e-2) 10 11 solver = Solver(model, small_data, 12 num_epochs=20, batch_size=50, 13 update_rule=‘adam‘, 14 optim_config={ 15 ‘learning_rate‘: 2e-4, 16 }, 17 verbose=True, print_every=1) 18 solver.train()
是
时间: 2024-10-26 23:24:24