原理可以参考
https://ujjwalkarn.me/2016/08/11/intuitive-explanation-convnets/
以及《神经网络与深度学习》
上代码:
import tensorflow as tf from tqdm import tqdm_notebook from tensorflow.examples.tutorials.mnist import input_data """ the cnn we are going to make: conv?relu?pool?affine?relu?affine?softmax """ # ==fc== straightforward def relu(X): return tf.maximum(X, tf.zeros_like(X)) # ==fc== fully connected layer calculator def affine(X, W, b): n = X.get_shape()[0].value # number of samples X_flat = tf.reshape(X, [n, -1]) return tf.matmul(X_flat, W) + b def flatten(X, window_h, window_w, window_c, out_h, out_w, stride=1, padding=0): X_padded = tf.pad(X, [[0, 0], [padding, padding], [padding, padding], [0, 0]]) windows = [] for y in range(out_h): for x in range(out_w): window = tf.slice(X_padded, [0, y * stride, x * stride, 0], [-1, window_h, window_w, -1]) windows.append(window) stacked = tf.stack(windows) # shape : [out_h, out_w, n, filter_h, filter_w, c] return tf.reshape(stacked, [-1, window_c * window_w * window_h]) # ==fc== def max_pool(X, pool_h, pool_w, padding, stride): n, h, w, c = [d.value for d in X.get_shape()] out_h = (h + 2 * padding - pool_h) // stride + 1 out_w = (w + 2 * padding - pool_w) // stride + 1 X_flat = flatten(X, pool_h, pool_w, c, out_h, out_w, stride, padding) pool = tf.reduce_max(tf.reshape(X_flat, [out_h, out_w, n, pool_h * pool_w, c]), axis=3) return tf.transpose(pool, [2, 0, 1, 3]) def convolution(X, W, b, padding, stride): n, h, w, c = map(lambda d: d.value, X.get_shape()) filter_h, filter_w, filter_c, filter_n = [d.value for d in W.get_shape()] out_h = (h + 2 * padding - filter_h) // stride + 1 out_w = (w + 2 * padding - filter_w) // stride + 1 X_flat = flatten(X, filter_h, filter_w, filter_c, out_h, out_w, stride, padding) W_flat = tf.reshape(W, [filter_h * filter_w * filter_c, filter_n]) z = tf.matmul(X_flat, W_flat) + b # b: 1 X filter_n return tf.transpose(tf.reshape(z, [out_h, out_w, n, filter_n]), [2, 0, 1, 3]) def softmax(X): X_centered = X - tf.reduce_max(X) # to avoid overflow X_exp = tf.exp(X_centered) exp_sum = tf.reduce_sum(X_exp, axis=1) return tf.transpose(tf.transpose(X_exp) / exp_sum) def accuracy(network, t): t_predict = tf.argmax(network, axis=1) t_actual = tf.argmax(t, axis=1) return tf.reduce_mean(tf.cast(tf.equal(t_predict, t_actual), tf.float32)) #==fc== load data mnist = input_data.read_data_sets("/tmp/data/", one_hot=True, reshape=False) #==fc== set batch size batch_size = 100 #==fc== get the first batch data example_X, example_ys = mnist.train.next_batch(batch_size) # ==fc== create session session = tf.InteractiveSession() X = tf.placeholder(‘float‘, [batch_size, 28, 28, 1]) t = tf.placeholder(‘float‘, [batch_size, 10]) filter_h, filter_w, filter_c, filter_n = 5, 5, 1, 30 W1 = tf.Variable(tf.random_normal([filter_h, filter_w, filter_c, filter_n], stddev=0.01)) b1 = tf.Variable(tf.zeros([filter_n])) conv_layer = convolution(X, W1, b1, padding=2, stride=1) conv_activation_layer = relu(conv_layer) pooling_layer = max_pool(conv_activation_layer, pool_h=2, pool_w=2, padding=0, stride=2) batch_size, pool_output_h, pool_output_w, filter_n = [d.value for d in pooling_layer.get_shape()] # number of nodes in the hidden layer hidden_size = 100 W2 = tf.Variable(tf.random_normal([pool_output_h*pool_output_w*filter_n, hidden_size], stddev=0.01)) b2 = tf.Variable(tf.zeros([hidden_size])) affine_layer1 = affine(pooling_layer, W2, b2) init = tf.global_variables_initializer() init.run() affine_layer1.eval({X:example_X, t:example_ys})[0] affine_activation_layer1 = relu(affine_layer1) affine_activation_layer1.eval({X:example_X, t:example_ys})[0] output_size = 10 W3 = tf.Variable(tf.random_normal([hidden_size, output_size], stddev=0.01)) b3 = tf.Variable(tf.zeros([output_size])) affine_layer2 = affine(affine_activation_layer1, W3, b3) init = tf.global_variables_initializer() init.run() affine_layer2.eval({X:example_X, t:example_ys})[0] softmax_layer = softmax(affine_layer2) softmax_layer.eval({X:example_X, t:example_ys})[0] def cross_entropy_error(y, t): return -tf.reduce_mean(tf.log(tf.reduce_sum(y * t, axis=1))) loss = cross_entropy_error(softmax_layer, t) loss.eval({X:example_X, t:example_ys}) learning_rate = 0.1 trainer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) # number of times to iterate over training data training_epochs = 2 # number of batches num_batch = int(mnist.train.num_examples/batch_size) num_batch for epoch in range(training_epochs): avg_cost = 0 for _ in range(num_batch): train_X, train_ys = mnist.train.next_batch(batch_size) trainer.run(feed_dict={X:train_X, t:train_ys}) avg_cost += loss.eval(feed_dict={X:train_X, t:train_ys}) / num_batch print("Epoch:", ‘%04d‘ % (epoch+1), "cost=", "{:.9f}".format(avg_cost), flush=True) test_x = mnist.test.images[:batch_size] test_t = mnist.test.labels[:batch_size] accuracy(softmax_layer, t).eval(feed_dict={X:test_x, t:test_t}) session.close()
时间: 2024-10-23 16:54:59